From eccef6011eff120d82c08b9acfb712b0bc9a6c08 Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Fri, 8 May 2026 17:31:33 +0530 Subject: [PATCH 1/7] introduce synchronized_data_batch --- include/cucascade/data/data_batch.hpp | 453 ++++++--------------- include/cucascade/data/data_repository.hpp | 7 +- src/data/data_batch.cpp | 248 ++++------- test/data/test_data_repository.cpp | 20 +- 4 files changed, 237 insertions(+), 491 deletions(-) diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index 7f94d77..0868b7a 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -22,12 +22,12 @@ #include #include +#include #include #include #include #include #include -#include #include namespace cucascade { @@ -47,9 +47,10 @@ namespace cucascade { */ enum class batch_state { idle, read_only, mutable_locked }; -// Forward declarations -- required before data_batch because it friends them. +// Forward declarations -- required before data_batch because it be-friends them. class read_only_data_batch; class mutable_data_batch; +class synchronized_data_batch; /** * @brief Core data batch type representing the "idle" (unlocked) state. @@ -59,35 +60,26 @@ class mutable_data_batch; * and can only be reached through RAII accessor types that hold the appropriate lock. * * State transitions are static methods that move ownership of the accessor, - * making the source null at the call site. This provides compile-time enforcement: + * making the source null at the call site. This provides compile-time enforcement:1 * once a batch is locked, the caller cannot access the idle handle. * * @note Non-copyable and non-movable. The object itself never moves; only the * smart pointer to it is transferred between states. */ -class data_batch : public std::enable_shared_from_this { - public: - // -- Construction -- - - /** - * @brief Construct a new data_batch. - * - * @param batch_id Unique identifier for this batch (immutable after construction). - * @param data Owned data representation; must not be null. - */ - data_batch(uint64_t batch_id, std::unique_ptr data); +class data_batch { + friend synchronized_data_batch; + friend read_only_data_batch; + friend mutable_data_batch; - /** @brief Default destructor. */ + public: ~data_batch() = default; - // -- Deleted move/copy (D-04/CORE-07) -- + // -- Deleted move/copy -- data_batch(data_batch&&) = delete; data_batch& operator=(data_batch&&) = delete; data_batch(const data_batch&) = delete; data_batch& operator=(const data_batch&) = delete; - // -- Lock-free public API -- - /** * @brief Get the unique batch identifier. * @@ -97,80 +89,75 @@ class data_batch : public std::enable_shared_from_this { */ uint64_t get_batch_id() const; - /** - * @brief Increment the subscriber interest count. - */ - void subscribe(); + // Only friend accessor classes can call these methods. /** - * @brief Decrement the subscriber interest count. - * - * Atomic, lock-free. - * - * @throws std::runtime_error if subscriber count is already zero. + * @brief Get the memory tier of the held data. + * @return The current memory tier. */ - void unsubscribe(); + memory::Tier get_current_tier() const; /** - * @brief Get the current subscriber count. - * - * Atomic, lock-free. - * - * @return The number of active subscribers. + * @brief Get a raw pointer to the data representation. + * @return Non-owning pointer to the data, or nullptr if empty. */ - size_t get_subscriber_count() const; + idata_representation* get_data() const; /** - * @brief Get the observable lock state of this batch. - * - * Atomic, lock-free. Returns the current state: idle, read_only, or - * mutable_locked. Updated during every state transition. - * - * @return The current batch_state. + * @brief Get a raw pointer to the memory space. + * @return Non-owning pointer to the memory space, or nullptr if data is null. */ - batch_state get_state() const { return _state.load(std::memory_order_relaxed); } + memory::memory_space* get_memory_space() const; - /** - * @brief Get the number of active read_only_data_batch instances holding this batch. - * - * Atomic, lock-free. Counts concurrent shared-lock holders. Transitions to zero - * when the last read_only_data_batch is destroyed (or moved-from). - * - * @return The current reader count. - */ - size_t get_read_only_count() const { return _read_only_count.load(std::memory_order_acquire); } + template + void convert_to(representation_converter_registry& registry, + const memory::memory_space* target_memory_space, + rmm::cuda_stream_view stream) + { + auto new_representation = + registry.convert(*_data, target_memory_space, stream); + auto old_representation = std::move(_data); + _data = std::move(new_representation); + + bool needs_sync = (old_representation != nullptr && + old_representation->get_current_tier() == memory::Tier::GPU) || + _data->get_current_tier() == memory::Tier::GPU; + + if (needs_sync) { + // Conversions involving GPU may enqueue async operations on the provided + // stream that read from the source memory. Synchronize before the old + // representation is destroyed to avoid use-after-free. + stream.synchronize(); + } + } - // -- Static transition methods (D-13/D-14/D-15/D-17) -- + private: + data_batch(uint64_t batch_id, std::unique_ptr data); - /** - * @brief Transition from read-only back to idle (release shared lock). - * - * @param accessor Rvalue reference to the read-only accessor (consumed). - * @return The batch pointer, now in idle state. - */ - [[nodiscard]] static std::shared_ptr to_idle(read_only_data_batch&& accessor); + const uint64_t _batch_id; ///< Immutable batch identifier + std::unique_ptr _data; ///< Owned data representation +}; - /** - * @brief Transition from mutable back to idle (release exclusive lock). - * - * @param accessor Rvalue reference to the mutable accessor (consumed). - * @return The batch pointer, now in idle state. - */ - [[nodiscard]] static std::shared_ptr to_idle(mutable_data_batch&& accessor); +/** + * @brief Synchronized data batch type allowing thread safe access to the code data batch + * via RAII accessers. + */ +class synchronized_data_batch : std::enable_shared_from_this { + friend read_only_data_batch; + friend mutable_data_batch; - // -- Non-static transitions (via shared_from_this) -- - // The caller's shared_ptr is NOT consumed. These only work when the - // data_batch is managed by a shared_ptr (throws bad_weak_ptr otherwise). + public: + static std::shared_ptr make(uint64_t batch_id, + std::unique_ptr data); /** * @brief Transition from idle to read-only (shared lock) without consuming the caller's pointer. * - * Uses shared_from_this() to obtain a new shared_ptr. Blocks until the - * shared lock is acquired. + * Blocks until the shared lock is acquired. * * @return A read_only_data_batch holding the shared lock. */ - [[nodiscard]] read_only_data_batch to_read_only(); + [[nodiscard]] read_only_data_batch get_read_only(); /** * @brief Transition from idle to mutable (exclusive lock) without consuming the caller's pointer. @@ -180,7 +167,7 @@ class data_batch : public std::enable_shared_from_this { * * @return A mutable_data_batch holding the exclusive lock. */ - [[nodiscard]] mutable_data_batch to_mutable(); + [[nodiscard]] mutable_data_batch get_mutable(); /** * @brief Try to transition from idle to read-only (non-blocking). @@ -188,7 +175,7 @@ class data_batch : public std::enable_shared_from_this { * @return An optional containing the read-only accessor on success, or * std::nullopt if the lock could not be acquired immediately. */ - [[nodiscard]] std::optional try_to_read_only(); + [[nodiscard]] std::optional try_get_read_only(); /** * @brief Try to transition from idle to mutable (non-blocking). @@ -196,70 +183,44 @@ class data_batch : public std::enable_shared_from_this { * @return An optional containing the mutable accessor on success, or * std::nullopt if the lock could not be acquired immediately. */ - [[nodiscard]] std::optional try_to_mutable(); - - // -- Locked-to-locked static transitions -- + [[nodiscard]] std::optional try_get_mutable(); /** - * @brief Transition from read-only to mutable (upgrade lock). - * - * Releases the shared lock, then acquires an exclusive lock (may block). - * The source accessor is consumed via move. - * NOTE: The transition is not atomic. - * - * @param accessor Rvalue reference to the read-only accessor (consumed). - * @return A mutable_data_batch holding the exclusive lock. + * @brief Increment the subscriber interest count. */ - [[nodiscard]] static mutable_data_batch readonly_to_mutable(read_only_data_batch&& accessor); + void subscribe(); /** - * @brief Transition from mutable to read-only (downgrade lock). + * @brief Decrement the subscriber interest count. * - * Releases the exclusive lock, then acquires a shared lock (may block). - * The source accessor is consumed via move. - * NOTE: The transition is not atomic. + * Atomic, lock-free. * - * @param accessor Rvalue reference to the mutable accessor (consumed). - * @return A read_only_data_batch holding the shared lock. + * @throws std::runtime_error if subscriber count is already zero. */ - [[nodiscard]] static read_only_data_batch mutable_to_readonly(mutable_data_batch&& accessor); - - private: - // -- Friend declarations (D-24/REPO-04) -- - friend class read_only_data_batch; - friend class mutable_data_batch; - - // -- Private data accessors (D-23/CORE-02) -- - // Only friend accessor classes can call these methods. + void unsubscribe(); - /** - * @brief Get the memory tier of the held data. - * @return The current memory tier. - */ - memory::Tier get_current_tier() const; + size_t get_subscriber_count() const; - /** - * @brief Get a raw pointer to the data representation. - * @return Non-owning pointer to the data, or nullptr if empty. - */ - idata_representation* get_data() const; + template + std::shared_ptr clone_to(representation_converter_registry& registry, + uint64_t new_batch_id, + const memory::memory_space* target_memory_space, + rmm::cuda_stream_view stream) const + { + auto new_representation = + registry.convert(_batch._data, target_memory_space, stream); + return make(new_batch_id, std::move(new_representation)); + } - /** - * @brief Get a raw pointer to the memory space. - * @return Non-owning pointer to the memory space, or nullptr if data is null. - */ - memory::memory_space* get_memory_space() const; + private: + synchronized_data_batch(uint64_t batch_id, std::unique_ptr data) + : _batch(data_batch(batch_id, std::move(data))) + { + } - /** - * @brief Replace the data representation. - * @param data New data representation (takes ownership). - */ - void set_data(std::unique_ptr data); + data_batch _batch; + mutable std::shared_mutex _rw_mutex; - // -- Members -- - const uint64_t _batch_id; ///< Immutable batch identifier - std::unique_ptr _data; ///< Owned data representation - mutable std::shared_mutex _rw_mutex; ///< Reader-writer mutex std::atomic _subscriber_count{0}; ///< Atomic subscriber interest count std::atomic _state{batch_state::idle}; ///< Observable lock state std::atomic _read_only_count{0}; ///< Count of active read_only_data_batch instances @@ -278,83 +239,55 @@ class data_batch : public std::enable_shared_from_this { * is destroyed, moved-from, or overwritten by assignment. */ class read_only_data_batch { - public: - // -- Named accessor methods (D-09/ACC-01) -- - - /** @brief Get the batch identifier. */ - uint64_t get_batch_id() const { return _batch->get_batch_id(); } - - /** @brief Get the memory tier of the held data. */ - memory::Tier get_current_tier() const { return _batch->get_current_tier(); } + friend class synchronized_data_batch; - /** @brief Get a raw pointer to the data representation. */ - idata_representation* get_data() const { return _batch->get_data(); } - - /** @brief Get a raw pointer to the memory space. */ - memory::memory_space* get_memory_space() const { return _batch->get_memory_space(); } - - // -- Clone operations (D-18/D-19/D-20/CLONE-01/CLONE-02) -- - - /** - * @brief Create an independent deep copy of the batch data. - * - * The clone has a new batch ID and its own copy of the data representation, - * residing in the same memory space as the original. - * - * @param new_batch_id Batch ID for the cloned batch. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - * @throws std::runtime_error if the data is null. - */ - [[nodiscard]] std::shared_ptr clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const; - - /** - * @brief Create an independent deep copy with representation conversion. - * - * The clone has a new batch ID and its data is converted to TargetRepresentation - * using the provided converter registry. - * - * @tparam TargetRepresentation Target representation type. - * @param registry Converter registry for type-keyed dispatch. - * @param new_batch_id Batch ID for the cloned batch. - * @param target_memory_space Target memory space for the converted data. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - */ - template - [[nodiscard]] std::shared_ptr clone_to( - representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const; + public: + ~read_only_data_batch(); - // -- Move support -- + // -- Move-only -- read_only_data_batch(read_only_data_batch&& other) noexcept; read_only_data_batch& operator=(read_only_data_batch&& other) noexcept; - ~read_only_data_batch(); + read_only_data_batch(const read_only_data_batch& other) = delete; + read_only_data_batch& operator=(const read_only_data_batch& other) = delete; + + // will allow only read access + const data_batch* operator->() const { return &(_owner->_batch); } + const data_batch& operator*() const { return _owner->_batch; } + + static std::shared_ptr to_idle(read_only_data_batch&& accessor) + { + std::shared_ptr ptr = accessor._owner; + { + auto _ = std::move(accessor); + } // destroy accessor, releasing shared lock + return ptr; + } - // -- Copy support: acquires a new shared lock, increments reader count -- - read_only_data_batch(const read_only_data_batch& other); - read_only_data_batch& operator=(const read_only_data_batch& other); + read_only_data_batch clone_read_only_access() const + { + std::optional maybe_clone = _owner->try_get_read_only(); - private: - friend class data_batch; + // the shared_mutex is already locked under shared access because + // the current read_only_data_batch exists. + assert(maybe_clone.has_value()); + return std::move(*maybe_clone); + }; + private: /** * @brief Private constructor -- only data_batch methods can create instances. * * @param parent Shared pointer to the parent data_batch (moved in). * @param lock Shared lock already acquired on the parent's mutex. */ - read_only_data_batch(std::shared_ptr parent, + read_only_data_batch(std::shared_ptr parent, std::shared_lock lock); // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the shared lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. - std::shared_ptr _batch; ///< Parent lifetime (destroyed second) - std::shared_lock _lock; ///< Shared lock (destroyed first) + std::shared_ptr _owner; ///< Parent lifetime (destroyed second) + std::shared_lock _lock; ///< Shared lock (destroyed first) }; /** @@ -367,162 +300,44 @@ class read_only_data_batch { * Move-only. The exclusive lock is released when this object is destroyed or moved-from. */ class mutable_data_batch { - public: - // -- Read methods (same as read_only, ACC-02) -- - - /** @brief Get the batch identifier. */ - uint64_t get_batch_id() const { return _batch->get_batch_id(); } - - /** @brief Get the memory tier of the held data. */ - memory::Tier get_current_tier() const { return _batch->get_current_tier(); } - - /** @brief Get a raw pointer to the data representation. */ - idata_representation* get_data() const { return _batch->get_data(); } + friend class synchronized_data_batch; - /** @brief Get a raw pointer to the memory space. */ - memory::memory_space* get_memory_space() const { return _batch->get_memory_space(); } - - // -- Write methods (D-10/ACC-02) -- - - /** - * @brief Replace the data representation. - * @param data New data representation (takes ownership). - */ - void set_data(std::unique_ptr data) { _batch->set_data(std::move(data)); } - - /** - * @brief Convert the data representation in-place. - * - * Replaces the held data with a new representation produced by the converter - * registry. If the conversion involves the GPU tier, synchronizes the stream - * before the old representation is destroyed to prevent use-after-free. - * - * @tparam TargetRepresentation Target representation type. - * @param registry Converter registry for type-keyed dispatch. - * @param target_memory_space Target memory space for the new representation. - * @param stream CUDA stream for memory operations. - */ - template - void convert_to(representation_converter_registry& registry, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream); - - // -- Clone operations (CLONE-01/CLONE-02) -- - - /** - * @brief Create an independent deep copy of the batch data. - * - * The clone has a new batch ID and its own copy of the data representation, - * residing in the same memory space as the original. - * - * @param new_batch_id Batch ID for the cloned batch. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - * @throws std::runtime_error if the data is null. - */ - [[nodiscard]] std::shared_ptr clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const; - - /** - * @brief Create an independent deep copy with representation conversion. - * - * The clone has a new batch ID and its data is converted to TargetRepresentation - * using the provided converter registry. - * - * @tparam TargetRepresentation Target representation type. - * @param registry Converter registry for type-keyed dispatch. - * @param new_batch_id Batch ID for the cloned batch. - * @param target_memory_space Target memory space for the converted data. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - */ - template - [[nodiscard]] std::shared_ptr clone_to( - representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const; + public: + ~mutable_data_batch(); // -- Move-only -- mutable_data_batch(mutable_data_batch&& other) noexcept; mutable_data_batch& operator=(mutable_data_batch&& other) noexcept; - ~mutable_data_batch(); mutable_data_batch(const mutable_data_batch&) = delete; mutable_data_batch& operator=(const mutable_data_batch&) = delete; - private: - friend class data_batch; + data_batch* operator->() { return &(_owner->_batch); } + data_batch& operator*() { return _owner->_batch; } + static std::shared_ptr to_idle(mutable_data_batch&& accessor) + { + std::shared_ptr ptr = accessor._owner; + { + auto _ = std::move(accessor); + } // destroy accessor, releasing shared lock + return ptr; + } + + private: /** * @brief Private constructor -- only data_batch methods can create instances. * * @param parent Shared pointer to the parent data_batch (moved in). * @param lock Exclusive lock already acquired on the parent's mutex. */ - mutable_data_batch(std::shared_ptr parent, + mutable_data_batch(std::shared_ptr parent, std::unique_lock lock); // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the exclusive lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. - std::shared_ptr _batch; ///< Parent lifetime (destroyed second) - std::unique_lock _lock; ///< Exclusive lock (destroyed first) + std::shared_ptr _owner; ///< Parent lifetime (destroyed second) + std::unique_lock _lock; ///< Exclusive lock (destroyed first) }; -// ============================================================================= -// Template implementations (TargetRepresentation-templated methods only) -// ============================================================================= - -// -- read_only_data_batch::clone_to (deep copy + conversion, CLONE-02) -- - -template -std::shared_ptr read_only_data_batch::clone_to( - representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const -{ - auto new_representation = - registry.convert(*_batch->_data, target_memory_space, stream); - return std::make_shared(new_batch_id, std::move(new_representation)); -} - -// -- mutable_data_batch::convert_to (in-place conversion, ACC-02) -- - -template -void mutable_data_batch::convert_to(representation_converter_registry& registry, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) -{ - auto new_representation = - registry.convert(*_batch->_data, target_memory_space, stream); - auto old_representation = std::move(_batch->_data); - _batch->_data = std::move(new_representation); - - bool needs_sync = - old_representation != nullptr && (old_representation->get_current_tier() == memory::Tier::GPU || - _batch->_data->get_current_tier() == memory::Tier::GPU); - - if (needs_sync) { - // Conversions involving GPU may enqueue async operations on the provided - // stream that read from the source memory. Synchronize before the old - // representation is destroyed to avoid use-after-free. - stream.synchronize(); - } -} - -// -- mutable_data_batch::clone_to (deep copy + conversion, CLONE-02) -- - -template -std::shared_ptr mutable_data_batch::clone_to( - representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const -{ - auto new_representation = - registry.convert(*_batch->_data, target_memory_space, stream); - return std::make_shared(new_batch_id, std::move(new_representation)); -} - } // namespace cucascade diff --git a/include/cucascade/data/data_repository.hpp b/include/cucascade/data/data_repository.hpp index df4fb45..56ff768 100644 --- a/include/cucascade/data/data_repository.hpp +++ b/include/cucascade/data/data_repository.hpp @@ -84,10 +84,9 @@ class idata_repository { _data_batches.resize(partition_idx + 1); } _data_batches[partition_idx].push_back(std::move(batch)); - } + } } - /** * @brief Remove and return the next data batch from the repository. * @@ -268,12 +267,12 @@ class idata_repository { } protected: - mutable std::mutex _mutex; ///< Mutex for thread-safe access to repository operations + mutable std::mutex _mutex; ///< Mutex for thread-safe access to repository operations std::vector> _data_batches; ///< Container for data batch pointers (partitioned) }; -using shared_data_repository = idata_repository>; +using shared_data_repository = idata_repository>; using unique_data_repository = idata_repository>; } // namespace cucascade diff --git a/src/data/data_batch.cpp b/src/data/data_batch.cpp index 74586b3..18fd466 100644 --- a/src/data/data_batch.cpp +++ b/src/data/data_batch.cpp @@ -19,87 +19,48 @@ namespace cucascade { -// ========== data_batch implementation ========== - -data_batch::data_batch(uint64_t batch_id, std::unique_ptr data) - : _batch_id(batch_id), _data(std::move(data)) -{ - if (_data == nullptr) { throw std::runtime_error("data is null in data_batch constructor"); } -} +// ========== data_batch ========== uint64_t data_batch::get_batch_id() const { return _batch_id; } -void data_batch::subscribe() -{ - _subscriber_count.fetch_add(1, std::memory_order_relaxed); -} - -void data_batch::unsubscribe() -{ - size_t current = _subscriber_count.load(std::memory_order_relaxed); - while (true) { - if (current == 0) { - throw std::runtime_error("Cannot unsubscribe: subscriber count is already zero"); - } - if (_subscriber_count.compare_exchange_weak( - current, current - 1, std::memory_order_relaxed, std::memory_order_relaxed)) { - return; - } - } -} - -size_t data_batch::get_subscriber_count() const -{ - return _subscriber_count.load(std::memory_order_relaxed); -} - -// ========== data_batch private data accessors ========== - memory::Tier data_batch::get_current_tier() const { return _data->get_current_tier(); } idata_representation* data_batch::get_data() const { return _data.get(); } -memory::memory_space* data_batch::get_memory_space() const -{ - if (_data == nullptr) { return nullptr; } - return &_data->get_memory_space(); -} - -void data_batch::set_data(std::unique_ptr data) { _data = std::move(data); } - -// ========== Static transition methods ========== +memory::memory_space* data_batch::get_memory_space() const { return &(_data->get_memory_space()); } -std::shared_ptr data_batch::to_idle(read_only_data_batch&& accessor) +data_batch::data_batch(uint64_t batch_id, std::unique_ptr data) + : _batch_id(batch_id), _data(std::move(data)) { - auto ptr = accessor._batch; - { auto _ = std::move(accessor); } // destroy accessor, releasing shared lock - return ptr; } -std::shared_ptr data_batch::to_idle(mutable_data_batch&& accessor) +// ========== synchronized_data_batch ========== + +std::shared_ptr synchronized_data_batch::make( + uint64_t batch_id, std::unique_ptr data) { - auto ptr = accessor._batch; - { auto _ = std::move(accessor); } // destroy accessor, releasing exclusive lock - return ptr; + if (data == nullptr) { + throw std::runtime_error("data is null in synchronized_data_batch constructor"); + } + return std::shared_ptr( + new synchronized_data_batch(batch_id, std::move(data))); } -// ========== Non-static transition methods ========== - -read_only_data_batch data_batch::to_read_only() +read_only_data_batch synchronized_data_batch::get_read_only() { auto self = shared_from_this(); std::shared_lock lock(_rw_mutex); return read_only_data_batch(std::move(self), std::move(lock)); } -mutable_data_batch data_batch::to_mutable() +mutable_data_batch synchronized_data_batch::get_mutable() { auto self = shared_from_this(); std::unique_lock lock(_rw_mutex); return mutable_data_batch(std::move(self), std::move(lock)); } -std::optional data_batch::try_to_read_only() +std::optional synchronized_data_batch::try_get_read_only() { std::shared_lock lock(_rw_mutex, std::try_to_lock); if (!lock.owns_lock()) { return std::nullopt; } @@ -107,7 +68,7 @@ std::optional data_batch::try_to_read_only() return read_only_data_batch(std::move(self), std::move(lock)); } -std::optional data_batch::try_to_mutable() +std::optional synchronized_data_batch::try_get_mutable() { std::unique_lock lock(_rw_mutex, std::try_to_lock); if (!lock.owns_lock()) { return std::nullopt; } @@ -115,158 +76,129 @@ std::optional data_batch::try_to_mutable() return mutable_data_batch(std::move(self), std::move(lock)); } -// ========== Locked-to-locked static transitions ========== +void synchronized_data_batch::subscribe() +{ _subscriber_count.fetch_add(1, std::memory_order_relaxed); } -mutable_data_batch data_batch::readonly_to_mutable(read_only_data_batch&& accessor) +void synchronized_data_batch::unsubscribe() { - auto ptr = accessor._batch; - { - // destructor decrements _read_only_count, releases the shared lock and sets state to idle - auto _ = std::move(accessor); // move into temporary, destroyed at } - } - std::unique_lock lock(ptr->_rw_mutex); - return mutable_data_batch(std::move(ptr), std::move(lock)); + size_t current = _subscriber_count.load(std::memory_order_relaxed); + while (true) { + if (current == 0) { + throw std::runtime_error("Cannot unsubscribe: subscriber count is already zero"); + } + if (_subscriber_count.compare_exchange_weak( + current, current - 1, std::memory_order_relaxed, std::memory_order_relaxed)) { + return; + } + } } -read_only_data_batch data_batch::mutable_to_readonly(mutable_data_batch&& accessor) -{ - auto ptr = accessor._batch; - { - // destructor frees the exclusive lock and sets state to idle - auto _ = std::move(accessor); // move into temporary, destroyed at } - } - std::shared_lock lock(ptr->_rw_mutex); - return read_only_data_batch(std::move(ptr), std::move(lock)); -} +size_t synchronized_data_batch::get_subscriber_count() const +{ return _subscriber_count.load(std::memory_order_relaxed); } // ========== read_only_data_batch ========== -read_only_data_batch::read_only_data_batch(std::shared_ptr parent, - std::shared_lock lock) - : _batch(std::move(parent)), _lock(std::move(lock)) +read_only_data_batch::~read_only_data_batch() { - _batch->_read_only_count.fetch_add(1); - _batch->_state.store(batch_state::read_only); + // Decrement the reader count. If we were the last reader, transition to idle. + // NOTE: Do NOT call _lock.unlock() here — the _lock member destructor handles that. + // The destructor body runs before member destructors, so _owner is still valid here. + // After this function returns, _lock destructor fires first (declared after _owner, + // destroyed in reverse order), releasing the shared lock. Then _owner destructor fires. + if (_owner) { // the read_only_data_batch may have been moved. + size_t prev = _owner->_read_only_count.fetch_sub(1); + if (prev == 1) { _owner->_state.store(batch_state::idle); } + } } read_only_data_batch::read_only_data_batch(read_only_data_batch&& other) noexcept - : _batch(std::move(other._batch)), _lock(std::move(other._lock)) -{ - // other._batch is now nullptr — other's destructor will be a no-op. - // The read_only_count does NOT change: ownership was transferred, not a new reader created. -} - -read_only_data_batch::read_only_data_batch(const read_only_data_batch& other) - : _batch(other._batch) - , _lock(other._batch ? std::shared_lock(other._batch->_rw_mutex) - : std::shared_lock()) + : _owner(std::move(other._owner)), _lock(std::move(other._lock)) { - if (_batch) { _batch->_read_only_count.fetch_add(1); } + // other._owner is now nullptr — other's destructor will be a no-op. } read_only_data_batch& read_only_data_batch::operator=(read_only_data_batch&& other) noexcept { if (this != &other) { // Release the current state (same logic as destructor) - if (_batch) { - auto prev = _batch->_read_only_count.fetch_sub(1); - if (prev == 1) { _batch->_state.store(batch_state::idle); } + if (_owner) { + size_t prev = _owner->_read_only_count.fetch_sub(1); + if (prev == 1) { _owner->_state.store(batch_state::idle); } // _lock will be replaced below; its destructor fires when the old _lock is overwritten, // releasing the shared lock. We release _lock explicitly here so the sequence is: // decrement count -> set state (if last) -> release lock. _lock.unlock(); } - _batch = std::move(other._batch); - _lock = std::move(other._lock); - } - return *this; -} - -read_only_data_batch& read_only_data_batch::operator=(const read_only_data_batch& other) -{ - if (this != &other) { - if (_batch) { - auto prev = _batch->_read_only_count.fetch_sub(1); - if (prev == 1) { _batch->_state.store(batch_state::idle); } - _lock.unlock(); - } - _batch = other._batch; - if (_batch) { - _lock = std::shared_lock(_batch->_rw_mutex); - _batch->_read_only_count.fetch_add(1); - _batch->_state.store(batch_state::read_only); - } else { - _lock = std::shared_lock(); - } + _owner = std::move(other._owner); + _lock = std::move(other._lock); + // other._owner is now nullptr — other's destructor will be a no-op. } return *this; } -read_only_data_batch::~read_only_data_batch() -{ - if (_batch) { - // Decrement the reader count. If we were the last reader, transition to idle. - // NOTE: Do NOT call _lock.unlock() here — the _lock member destructor handles that. - // The destructor body runs before member destructors, so _batch is still valid here. - // After this function returns, _lock destructor fires first (declared after _batch, - // destroyed in reverse order), releasing the shared lock. Then _batch destructor fires. - auto prev = _batch->_read_only_count.fetch_sub(1); - if (prev == 1) { _batch->_state.store(batch_state::idle); } - } -} - -std::shared_ptr read_only_data_batch::clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const +// read_only_data_batch& read_only_data_batch::operator=(const read_only_data_batch& other) +// { +// if (this != &other) { +// if (_batch) { +// auto prev = _batch->_read_only_count.fetch_sub(1); +// if (prev == 1) { _batch->_state.store(batch_state::idle); } +// _lock.unlock(); +// } +// _batch = other._batch; +// if (_batch) { +// _lock = std::shared_lock(_batch->_rw_mutex); +// _batch->_read_only_count.fetch_add(1); +// _batch->_state.store(batch_state::read_only); +// } else { +// _lock = std::shared_lock(); +// } +// } +// return *this; +// } + +read_only_data_batch::read_only_data_batch(std::shared_ptr owner, + std::shared_lock lock) + : _owner(std::move(owner)), _lock(std::move(lock)) { - if (_batch->_data == nullptr) { throw std::runtime_error("Cannot clone: data is null"); } - auto cloned_data = _batch->_data->clone(stream); - return std::make_shared(new_batch_id, std::move(cloned_data)); + _owner->_read_only_count.fetch_add(1); + _owner->_state.store(batch_state::read_only); } // ========== mutable_data_batch ========== -mutable_data_batch::mutable_data_batch(std::shared_ptr parent, - std::unique_lock lock) - : _batch(std::move(parent)), _lock(std::move(lock)) +mutable_data_batch::~mutable_data_batch() { - _batch->_state.store(batch_state::mutable_locked); + if (_owner) { // mutable_data_batch may have been moved + // Transition state to idle. The _lock member destructor handles releasing the exclusive lock. + _owner->_state.store(batch_state::idle); + } } mutable_data_batch::mutable_data_batch(mutable_data_batch&& other) noexcept - : _batch(std::move(other._batch)), _lock(std::move(other._lock)) + : _owner(std::move(other._owner)), _lock(std::move(other._lock)) { - // other._batch is now nullptr — other's destructor will be a no-op. + // other._owner is now nullptr — other's destructor will be a no-op. } mutable_data_batch& mutable_data_batch::operator=(mutable_data_batch&& other) noexcept { if (this != &other) { // Release the current state (same logic as destructor) - if (_batch) { - _batch->_state.store(batch_state::idle); + if (_owner) { + _owner->_state.store(batch_state::idle); // Release the exclusive lock explicitly before taking ownership of the new one. _lock.unlock(); } - _batch = std::move(other._batch); + _owner = std::move(other._owner); _lock = std::move(other._lock); + // other._owner is now nullptr — other's destructor will be a no-op. } return *this; } -mutable_data_batch::~mutable_data_batch() -{ - if (_batch) { - // Transition state to idle. The _lock member destructor handles releasing the exclusive lock. - _batch->_state.store(batch_state::idle); - } -} - -std::shared_ptr mutable_data_batch::clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const -{ - if (_batch->_data == nullptr) { throw std::runtime_error("Cannot clone: data is null"); } - auto cloned_data = _batch->_data->clone(stream); - return std::make_shared(new_batch_id, std::move(cloned_data)); -} +mutable_data_batch::mutable_data_batch(std::shared_ptr owner, + std::unique_lock lock) + : _owner(std::move(owner)), _lock(std::move(lock)) +{ _owner->_state.store(batch_state::mutable_locked); } } // namespace cucascade diff --git a/test/data/test_data_repository.cpp b/test/data/test_data_repository.cpp index eb587f1..96b0e5a 100644 --- a/test/data/test_data_repository.cpp +++ b/test/data/test_data_repository.cpp @@ -48,13 +48,13 @@ TEST_CASE("shared_data_repository Add and Pull Single Batch", "[data_repository] shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = synchronized_data_batch::make(1, std::move(data)); repository.add_data_batch(batch); auto pulled_batch = repository.pop_next_data_batch(); REQUIRE(pulled_batch != nullptr); - REQUIRE(pulled_batch->get_batch_id() == 1); + REQUIRE(pulled_batch->get_read_only()->get_batch_id() == 1); auto empty = repository.pop_next_data_batch(); REQUIRE(empty == nullptr); @@ -953,8 +953,8 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns read_only batch", { shared_data_repository repository; - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(2, std::move(data)); + auto data = std::make_unique(memory::Tier::GPU, 1024); + auto batch = std::make_shared(2, std::move(data)); auto accessor = batch->to_read_only(); repository.add_data_batch(batch); @@ -967,8 +967,8 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns mutable batch", "[ { shared_data_repository repository; - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(3, std::move(data)); + auto data = std::make_unique(memory::Tier::GPU, 1024); + auto batch = std::make_shared(3, std::move(data)); auto accessor = batch->to_mutable(); repository.add_data_batch(batch); @@ -1016,10 +1016,10 @@ TEST_CASE("shared_data_repository pop_next_data_batch with partitions", "[data_r { shared_data_repository repository; - auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); - auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(2, std::move(data2)); + auto data1 = std::make_unique(memory::Tier::GPU, 1024); + auto batch1 = std::make_shared(1, std::move(data1)); + auto data2 = std::make_unique(memory::Tier::GPU, 1024); + auto batch2 = std::make_shared(2, std::move(data2)); auto accessor = batch1->to_read_only(); repository.add_data_batch(batch1, 0); From 0e9ed73ffb5fb78defdcbd8e09ef37bbdbd2d354 Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Fri, 8 May 2026 17:58:19 +0530 Subject: [PATCH 2/7] Add state access methods to synchronized_data_batch --- include/cucascade/data/data_batch.hpp | 93 ++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 3 deletions(-) diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index 0868b7a..cfd7dc9 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -109,6 +109,24 @@ class data_batch { */ memory::memory_space* get_memory_space() const; + /** + * @brief Replace the data representation. + * @param data New data representation (takes ownership). + */ + void set_data(std::unique_ptr data) { _data = std::move(data); } + + /** + * @brief Convert the data representation in-place. + * + * Replaces the held data with a new representation produced by the converter + * registry. If the conversion involves the GPU tier, synchronizes the stream + * before the old representation is destroyed to prevent use-after-free. + * + * @tparam TargetRepresentation Target representation type. + * @param registry Converter registry for type-keyed dispatch. + * @param target_memory_space Target memory space for the new representation. + * @param stream CUDA stream for memory operations. + */ template void convert_to(representation_converter_registry& registry, const memory::memory_space* target_memory_space, @@ -151,7 +169,17 @@ class synchronized_data_batch : std::enable_shared_from_this data); /** - * @brief Transition from idle to read-only (shared lock) without consuming the caller's pointer. + * @brief Get the unique batch identifier. + * + * Lock-free -- safe to call without acquiring an accessor. + * + * @return The batch ID (immutable after construction). + */ + [[nodiscard]] uint64_t get_batch_id() const { return _batch.get_batch_id(); } + + /** + * @brief Transition from idle to read-only (shared lock) without consuming the caller's + * pointer. * * Blocks until the shared lock is acquired. * @@ -160,7 +188,8 @@ class synchronized_data_batch : std::enable_shared_from_this std::shared_ptr clone_to(representation_converter_registry& registry, uint64_t new_batch_id, @@ -212,6 +281,24 @@ class synchronized_data_batch : std::enable_shared_from_this clone(uint64_t new_batch_id, + rmm::cuda_stream_view stream) const + { + auto cloned_data = _batch._data->clone(stream); + return make(new_batch_id, std::move(cloned_data)); + } + private: synchronized_data_batch(uint64_t batch_id, std::unique_ptr data) : _batch(data_batch(batch_id, std::move(data))) From 5eadd0c4c37e70ffa52ba470b55dedbbf65f6c4a Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Fri, 8 May 2026 18:08:15 +0530 Subject: [PATCH 3/7] rename data_batch to data_batch_core and synchronized_data_batch to data_batch --- include/cucascade/data/data_batch.hpp | 77 ++++---- include/cucascade/data/data_repository.hpp | 4 +- .../data/data_repository_manager.hpp | 16 +- src/data/data_batch.cpp | 38 ++-- src/data/data_repository.cpp | 10 +- test/data/test_data_batch.cpp | 169 ++++++++---------- test/data/test_data_repository.cpp | 76 ++++---- test/data/test_data_repository_manager.cpp | 42 ++--- 8 files changed, 205 insertions(+), 227 deletions(-) diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index cfd7dc9..2427e2b 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -50,7 +50,7 @@ enum class batch_state { idle, read_only, mutable_locked }; // Forward declarations -- required before data_batch because it be-friends them. class read_only_data_batch; class mutable_data_batch; -class synchronized_data_batch; +class data_batch; /** * @brief Core data batch type representing the "idle" (unlocked) state. @@ -66,19 +66,19 @@ class synchronized_data_batch; * @note Non-copyable and non-movable. The object itself never moves; only the * smart pointer to it is transferred between states. */ -class data_batch { - friend synchronized_data_batch; +class data_batch_core { + friend data_batch; friend read_only_data_batch; friend mutable_data_batch; public: - ~data_batch() = default; + ~data_batch_core() = default; // -- Deleted move/copy -- - data_batch(data_batch&&) = delete; - data_batch& operator=(data_batch&&) = delete; - data_batch(const data_batch&) = delete; - data_batch& operator=(const data_batch&) = delete; + data_batch_core(data_batch_core&&) = delete; + data_batch_core& operator=(data_batch_core&&) = delete; + data_batch_core(const data_batch_core&) = delete; + data_batch_core& operator=(const data_batch_core&) = delete; /** * @brief Get the unique batch identifier. @@ -150,7 +150,7 @@ class data_batch { } private: - data_batch(uint64_t batch_id, std::unique_ptr data); + data_batch_core(uint64_t batch_id, std::unique_ptr data); const uint64_t _batch_id; ///< Immutable batch identifier std::unique_ptr _data; ///< Owned data representation @@ -160,13 +160,13 @@ class data_batch { * @brief Synchronized data batch type allowing thread safe access to the code data batch * via RAII accessers. */ -class synchronized_data_batch : std::enable_shared_from_this { +class data_batch : std::enable_shared_from_this { friend read_only_data_batch; friend mutable_data_batch; public: - static std::shared_ptr make(uint64_t batch_id, - std::unique_ptr data); + static std::shared_ptr make(uint64_t batch_id, + std::unique_ptr data); /** * @brief Get the unique batch identifier. @@ -271,13 +271,13 @@ class synchronized_data_batch : std::enable_shared_from_this - std::shared_ptr clone_to(representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const + std::shared_ptr clone_to(representation_converter_registry& registry, + uint64_t new_batch_id, + const memory::memory_space* target_memory_space, + rmm::cuda_stream_view stream) const { auto new_representation = - registry.convert(_batch._data, target_memory_space, stream); + registry.convert(*(_batch._data), target_memory_space, stream); return make(new_batch_id, std::move(new_representation)); } @@ -292,20 +292,20 @@ class synchronized_data_batch : std::enable_shared_from_this clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const + [[nodiscard]] std::shared_ptr clone(uint64_t new_batch_id, + rmm::cuda_stream_view stream) const { auto cloned_data = _batch._data->clone(stream); return make(new_batch_id, std::move(cloned_data)); } private: - synchronized_data_batch(uint64_t batch_id, std::unique_ptr data) - : _batch(data_batch(batch_id, std::move(data))) + data_batch(uint64_t batch_id, std::unique_ptr data) + : _batch(data_batch_core(batch_id, std::move(data))) { } - data_batch _batch; + data_batch_core _batch; mutable std::shared_mutex _rw_mutex; std::atomic _subscriber_count{0}; ///< Atomic subscriber interest count @@ -326,7 +326,7 @@ class synchronized_data_batch : std::enable_shared_from_this() const { return &(_owner->_batch); } - const data_batch& operator*() const { return _owner->_batch; } + const data_batch_core* operator->() const { return &(_owner->_batch); } + const data_batch_core& operator*() const { return _owner->_batch; } - static std::shared_ptr to_idle(read_only_data_batch&& accessor) + static std::shared_ptr to_idle(read_only_data_batch&& accessor) { - std::shared_ptr ptr = accessor._owner; + std::shared_ptr ptr = accessor._owner; { auto _ = std::move(accessor); } // destroy accessor, releasing shared lock @@ -367,14 +367,14 @@ class read_only_data_batch { * @param parent Shared pointer to the parent data_batch (moved in). * @param lock Shared lock already acquired on the parent's mutex. */ - read_only_data_batch(std::shared_ptr parent, + read_only_data_batch(std::shared_ptr parent, std::shared_lock lock); // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the shared lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. - std::shared_ptr _owner; ///< Parent lifetime (destroyed second) - std::shared_lock _lock; ///< Shared lock (destroyed first) + std::shared_ptr _owner; ///< Parent lifetime (destroyed second) + std::shared_lock _lock; ///< Shared lock (destroyed first) }; /** @@ -387,7 +387,7 @@ class read_only_data_batch { * Move-only. The exclusive lock is released when this object is destroyed or moved-from. */ class mutable_data_batch { - friend class synchronized_data_batch; + friend class data_batch; public: ~mutable_data_batch(); @@ -398,12 +398,12 @@ class mutable_data_batch { mutable_data_batch(const mutable_data_batch&) = delete; mutable_data_batch& operator=(const mutable_data_batch&) = delete; - data_batch* operator->() { return &(_owner->_batch); } - data_batch& operator*() { return _owner->_batch; } + data_batch_core* operator->() { return &(_owner->_batch); } + data_batch_core& operator*() { return _owner->_batch; } - static std::shared_ptr to_idle(mutable_data_batch&& accessor) + static std::shared_ptr to_idle(mutable_data_batch&& accessor) { - std::shared_ptr ptr = accessor._owner; + std::shared_ptr ptr = accessor._owner; { auto _ = std::move(accessor); } // destroy accessor, releasing shared lock @@ -417,14 +417,13 @@ class mutable_data_batch { * @param parent Shared pointer to the parent data_batch (moved in). * @param lock Exclusive lock already acquired on the parent's mutex. */ - mutable_data_batch(std::shared_ptr parent, - std::unique_lock lock); + mutable_data_batch(std::shared_ptr parent, std::unique_lock lock); // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the exclusive lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. - std::shared_ptr _owner; ///< Parent lifetime (destroyed second) - std::unique_lock _lock; ///< Exclusive lock (destroyed first) + std::shared_ptr _owner; ///< Parent lifetime (destroyed second) + std::unique_lock _lock; ///< Exclusive lock (destroyed first) }; } // namespace cucascade diff --git a/include/cucascade/data/data_repository.hpp b/include/cucascade/data/data_repository.hpp index 56ff768..4efb2a6 100644 --- a/include/cucascade/data/data_repository.hpp +++ b/include/cucascade/data/data_repository.hpp @@ -272,7 +272,7 @@ class idata_repository { _data_batches; ///< Container for data batch pointers (partitioned) }; -using shared_data_repository = idata_repository>; -using unique_data_repository = idata_repository>; +using shared_data_repository = idata_repository>; +using unique_data_repository = idata_repository>; } // namespace cucascade diff --git a/include/cucascade/data/data_repository_manager.hpp b/include/cucascade/data/data_repository_manager.hpp index 8f93730..0e9927e 100644 --- a/include/cucascade/data/data_repository_manager.hpp +++ b/include/cucascade/data/data_repository_manager.hpp @@ -43,9 +43,7 @@ struct operator_port_key { std::string port_id; bool operator==(const operator_port_key& other) const - { - return operator_id == other.operator_id && port_id == other.port_id; - } + { return operator_id == other.operator_id && port_id == other.port_id; } bool operator<(const operator_port_key& other) const { @@ -136,9 +134,7 @@ class data_repository_manager { * @note Thread-safe operation */ void add_data_batch(PtrType batch, std::vector> ops) - { - add_data_batch_impl(std::move(batch), ops); - } + { add_data_batch_impl(std::move(batch), ops); } /** * @brief Get direct access to a repository for advanced operations. @@ -155,9 +151,7 @@ class data_repository_manager { * safety */ std::unique_ptr& get_repository(size_t operator_id, std::string_view port_id) - { - return _repositories.at({operator_id, std::string(port_id)}); - } + { return _repositories.at({operator_id, std::string(port_id)}); } /** * @brief Generate a globally unique data batch identifier. @@ -273,7 +267,7 @@ class data_repository_manager { }; // Type aliases for common use cases -using shared_data_repository_manager = data_repository_manager>; -using unique_data_repository_manager = data_repository_manager>; +using shared_data_repository_manager = data_repository_manager>; +using unique_data_repository_manager = data_repository_manager>; } // namespace cucascade diff --git a/src/data/data_batch.cpp b/src/data/data_batch.cpp index 18fd466..f5024ba 100644 --- a/src/data/data_batch.cpp +++ b/src/data/data_batch.cpp @@ -21,46 +21,46 @@ namespace cucascade { // ========== data_batch ========== -uint64_t data_batch::get_batch_id() const { return _batch_id; } +uint64_t data_batch_core::get_batch_id() const { return _batch_id; } -memory::Tier data_batch::get_current_tier() const { return _data->get_current_tier(); } +memory::Tier data_batch_core::get_current_tier() const { return _data->get_current_tier(); } -idata_representation* data_batch::get_data() const { return _data.get(); } +idata_representation* data_batch_core::get_data() const { return _data.get(); } -memory::memory_space* data_batch::get_memory_space() const { return &(_data->get_memory_space()); } +memory::memory_space* data_batch_core::get_memory_space() const +{ return &(_data->get_memory_space()); } -data_batch::data_batch(uint64_t batch_id, std::unique_ptr data) +data_batch_core::data_batch_core(uint64_t batch_id, std::unique_ptr data) : _batch_id(batch_id), _data(std::move(data)) { } // ========== synchronized_data_batch ========== -std::shared_ptr synchronized_data_batch::make( - uint64_t batch_id, std::unique_ptr data) +std::shared_ptr data_batch::make(uint64_t batch_id, + std::unique_ptr data) { if (data == nullptr) { throw std::runtime_error("data is null in synchronized_data_batch constructor"); } - return std::shared_ptr( - new synchronized_data_batch(batch_id, std::move(data))); + return std::shared_ptr(new data_batch(batch_id, std::move(data))); } -read_only_data_batch synchronized_data_batch::get_read_only() +read_only_data_batch data_batch::get_read_only() { auto self = shared_from_this(); std::shared_lock lock(_rw_mutex); return read_only_data_batch(std::move(self), std::move(lock)); } -mutable_data_batch synchronized_data_batch::get_mutable() +mutable_data_batch data_batch::get_mutable() { auto self = shared_from_this(); std::unique_lock lock(_rw_mutex); return mutable_data_batch(std::move(self), std::move(lock)); } -std::optional synchronized_data_batch::try_get_read_only() +std::optional data_batch::try_get_read_only() { std::shared_lock lock(_rw_mutex, std::try_to_lock); if (!lock.owns_lock()) { return std::nullopt; } @@ -68,7 +68,7 @@ std::optional synchronized_data_batch::try_get_read_only() return read_only_data_batch(std::move(self), std::move(lock)); } -std::optional synchronized_data_batch::try_get_mutable() +std::optional data_batch::try_get_mutable() { std::unique_lock lock(_rw_mutex, std::try_to_lock); if (!lock.owns_lock()) { return std::nullopt; } @@ -76,10 +76,9 @@ std::optional synchronized_data_batch::try_get_mutable() return mutable_data_batch(std::move(self), std::move(lock)); } -void synchronized_data_batch::subscribe() -{ _subscriber_count.fetch_add(1, std::memory_order_relaxed); } +void data_batch::subscribe() { _subscriber_count.fetch_add(1, std::memory_order_relaxed); } -void synchronized_data_batch::unsubscribe() +void data_batch::unsubscribe() { size_t current = _subscriber_count.load(std::memory_order_relaxed); while (true) { @@ -93,9 +92,6 @@ void synchronized_data_batch::unsubscribe() } } -size_t synchronized_data_batch::get_subscriber_count() const -{ return _subscriber_count.load(std::memory_order_relaxed); } - // ========== read_only_data_batch ========== read_only_data_batch::~read_only_data_batch() @@ -156,7 +152,7 @@ read_only_data_batch& read_only_data_batch::operator=(read_only_data_batch&& oth // return *this; // } -read_only_data_batch::read_only_data_batch(std::shared_ptr owner, +read_only_data_batch::read_only_data_batch(std::shared_ptr owner, std::shared_lock lock) : _owner(std::move(owner)), _lock(std::move(lock)) { @@ -196,7 +192,7 @@ mutable_data_batch& mutable_data_batch::operator=(mutable_data_batch&& other) no return *this; } -mutable_data_batch::mutable_data_batch(std::shared_ptr owner, +mutable_data_batch::mutable_data_batch(std::shared_ptr owner, std::unique_lock lock) : _owner(std::move(owner)), _lock(std::move(lock)) { _owner->_state.store(batch_state::mutable_locked); } diff --git a/src/data/data_repository.cpp b/src/data/data_repository.cpp index 3853710..bfdebd5 100644 --- a/src/data/data_repository.cpp +++ b/src/data/data_repository.cpp @@ -25,8 +25,9 @@ template class idata_repository>; // Explicit specialization of get_data_batch_by_id for shared_ptr (copies the pointer) template <> -std::shared_ptr idata_repository>::get_data_batch_by_id( - uint64_t batch_id, size_t partition_idx) +std::shared_ptr +idata_repository>::get_data_batch_by_id(uint64_t batch_id, + size_t partition_idx) { std::unique_lock lock(_mutex); @@ -46,8 +47,9 @@ std::shared_ptr idata_repository>::get_d // Explicit specialization of get_data_batch_by_id for unique_ptr (not supported) template <> -std::unique_ptr idata_repository>::get_data_batch_by_id( - uint64_t /*batch_id*/, size_t /*partition_idx*/) +std::unique_ptr +idata_repository>::get_data_batch_by_id(uint64_t /*batch_id*/, + size_t /*partition_idx*/) { throw std::runtime_error( "get_data_batch_by_id is not supported for unique_ptr repositories. " diff --git a/test/data/test_data_batch.cpp b/test/data/test_data_batch.cpp index 83944ba..5b2414e 100644 --- a/test/data/test_data_batch.cpp +++ b/test/data/test_data_batch.cpp @@ -53,7 +53,7 @@ using cucascade::test::mock_data_representation; TEST_CASE("data_batch construction via shared_ptr", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); REQUIRE(batch->get_batch_id() == 1); REQUIRE(batch->get_subscriber_count() == 0); @@ -62,7 +62,7 @@ TEST_CASE("data_batch construction via shared_ptr", "[data_batch]") TEST_CASE("data_batch construction via unique_ptr", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_unique(1, std::move(data)); + auto batch = std::make_unique(1, std::move(data)); REQUIRE(batch->get_batch_id() == 1); REQUIRE(batch->get_subscriber_count() == 0); @@ -74,10 +74,10 @@ TEST_CASE("data_batch construction via unique_ptr", "[data_batch]") TEST_CASE("data_batch is non-copyable and non-movable", "[data_batch]") { - static_assert(!std::is_copy_constructible_v); - static_assert(!std::is_move_constructible_v); - static_assert(!std::is_copy_assignable_v); - static_assert(!std::is_move_assignable_v); + static_assert(!std::is_copy_constructible_v); + static_assert(!std::is_move_constructible_v); + static_assert(!std::is_copy_assignable_v); + static_assert(!std::is_move_assignable_v); } // ============================================================================= @@ -87,14 +87,14 @@ TEST_CASE("data_batch is non-copyable and non-movable", "[data_batch]") TEST_CASE("data_batch get_batch_id is lock-free via shared_ptr", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(99, std::move(data)); + auto batch = std::make_shared(99, std::move(data)); // get_batch_id works without acquiring any lock REQUIRE(batch->get_batch_id() == 99); // Also works through the mutable accessor auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(99, std::move(data2)); + auto batch2 = std::make_shared(99, std::move(data2)); auto rw = batch2->to_mutable(); REQUIRE(rw.get_batch_id() == 99); } @@ -106,7 +106,7 @@ TEST_CASE("data_batch get_batch_id is lock-free via shared_ptr", "[data_batch]") TEST_CASE("data_batch to_read_only acquires shared access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); REQUIRE(ro.get_batch_id() == 1); @@ -116,7 +116,7 @@ TEST_CASE("data_batch to_read_only acquires shared access", "[data_batch]") TEST_CASE("data_batch multiple concurrent read_only via shared_ptr copies", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); auto ro2 = batch->to_read_only(); @@ -134,7 +134,7 @@ TEST_CASE("data_batch multiple concurrent read_only via shared_ptr copies", "[da TEST_CASE("data_batch try_to_read_only succeeds when unlocked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto result = batch->try_to_read_only(); REQUIRE(result.has_value()); @@ -144,7 +144,7 @@ TEST_CASE("data_batch try_to_read_only succeeds when unlocked", "[data_batch]") TEST_CASE("data_batch try_to_read_only fails when mutable lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto rw = batch->to_mutable(); @@ -160,7 +160,7 @@ TEST_CASE("data_batch try_to_read_only fails when mutable lock held", "[data_bat TEST_CASE("data_batch try_to_mutable succeeds when unlocked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto result = batch->try_to_mutable(); REQUIRE(result.has_value()); @@ -170,7 +170,7 @@ TEST_CASE("data_batch try_to_mutable succeeds when unlocked", "[data_batch]") TEST_CASE("data_batch try_to_mutable fails when readonly lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); @@ -186,7 +186,7 @@ TEST_CASE("data_batch try_to_mutable fails when readonly lock held", "[data_batc TEST_CASE("data_batch try_to_mutable fails when mutable lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto rw = batch->to_mutable(); @@ -206,7 +206,7 @@ TEST_CASE("data_batch try_to_mutable fails when mutable lock held", "[data_batch TEST_CASE("data_batch to_mutable acquires exclusive access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto rw = batch->to_mutable(); REQUIRE(rw.get_batch_id() == 1); @@ -215,7 +215,7 @@ TEST_CASE("data_batch to_mutable acquires exclusive access", "[data_batch]") TEST_CASE("data_batch mutable blocks until readonly released", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); // Acquire read-only on a heap-allocated accessor so we can control its lifetime auto ro = std::make_unique(batch->to_read_only()); @@ -242,7 +242,7 @@ TEST_CASE("data_batch mutable blocks until readonly released", "[data_batch]") TEST_CASE("data_batch mutable to readonly through idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto rw = batch->to_mutable(); auto idle = data_batch::to_idle(std::move(rw)); @@ -253,7 +253,7 @@ TEST_CASE("data_batch mutable to readonly through idle", "[data_batch]") TEST_CASE("data_batch readonly to mutable through idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto idle = data_batch::to_idle(std::move(ro)); @@ -275,7 +275,7 @@ TEST_CASE("data_batch destruction order safety", "[data_batch]") // If the order were reversed, the mutex would be destroyed before the lock // releases, causing undefined behavior detectable by TSan/ASan. auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); // Create accessor -- this is now the ONLY shared_ptr holding the batch alive. auto ro = batch->to_read_only(); @@ -292,7 +292,7 @@ TEST_CASE("data_batch destruction order safety", "[data_batch]") TEST_CASE("data_batch subscribe always succeeds", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); REQUIRE(batch->get_subscriber_count() == 0); batch->subscribe(); @@ -304,7 +304,7 @@ TEST_CASE("data_batch subscribe always succeeds", "[data_batch]") TEST_CASE("data_batch unsubscribe decrements count", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); batch->subscribe(); batch->subscribe(); @@ -319,7 +319,7 @@ TEST_CASE("data_batch unsubscribe decrements count", "[data_batch]") TEST_CASE("data_batch unsubscribe throws at zero", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); REQUIRE_THROWS_AS(batch->unsubscribe(), std::runtime_error); REQUIRE(batch->get_subscriber_count() == 0); @@ -328,7 +328,7 @@ TEST_CASE("data_batch unsubscribe throws at zero", "[data_batch]") TEST_CASE("data_batch subscriber count thread safety", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); constexpr int num_threads = 10; constexpr int subs_per_thread = 100; @@ -370,7 +370,7 @@ TEST_CASE("data_batch subscriber count thread safety", "[data_batch]") TEST_CASE("data_batch set_data via mutable accessor", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto rw = batch->to_mutable(); REQUIRE(rw.get_current_tier() == memory::Tier::GPU); @@ -389,19 +389,19 @@ TEST_CASE("data_batch accessor get_current_tier", "[data_batch]") { { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); REQUIRE(ro.get_current_tier() == memory::Tier::GPU); } { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(2, std::move(data)); + auto batch = std::make_shared(2, std::move(data)); auto ro = batch->to_read_only(); REQUIRE(ro.get_current_tier() == memory::Tier::HOST); } { auto data = std::make_unique(memory::Tier::DISK, 1024); - auto batch = std::make_shared(3, std::move(data)); + auto batch = std::make_shared(3, std::move(data)); auto ro = batch->to_read_only(); REQUIRE(ro.get_current_tier() == memory::Tier::DISK); } @@ -429,7 +429,7 @@ TEST_CASE("data_batch unique IDs", "[data_batch]") TEST_CASE("data_batch thread-safe concurrent readonly", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); constexpr int num_threads = 10; constexpr int reads_per_thread = 100; @@ -452,7 +452,7 @@ TEST_CASE("data_batch thread-safe concurrent readonly", "[data_batch]") TEST_CASE("data_batch thread-safe mutable access serialized", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); constexpr int num_threads = 10; std::atomic concurrent_writers{0}; @@ -485,7 +485,7 @@ TEST_CASE("data_batch thread-safe mutable access serialized", "[data_batch]") TEST_CASE("data_batch clone creates independent copy", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_shared(42, std::move(data)); + auto batch = std::make_shared(42, std::move(data)); auto ro = batch->to_read_only(); auto cloned = ro.clone(100, rmm::cuda_stream_view{}); @@ -503,7 +503,7 @@ TEST_CASE("data_batch clone creates independent copy", "[data_batch]") TEST_CASE("data_batch clone with different batch IDs", "[data_batch]") { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); @@ -522,7 +522,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") SECTION("GPU tier") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->to_read_only(); @@ -531,7 +531,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") SECTION("HOST tier") { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->to_read_only(); @@ -540,7 +540,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") SECTION("DISK tier") { auto data = std::make_unique(memory::Tier::DISK, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->to_read_only(); @@ -563,7 +563,7 @@ TEST_CASE("data_batch clone with real GPU data verifies data integrity", "[data_ auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = std::make_shared(1, std::move(gpu_repr)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, stream.view()); @@ -594,7 +594,7 @@ TEST_CASE("data_batch clone creates independent memory copies", "[data_batch][gp auto table = create_simple_cudf_table(50, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = std::make_shared(1, std::move(gpu_repr)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, stream.view()); @@ -611,7 +611,6 @@ TEST_CASE("data_batch clone creates independent memory copies", "[data_batch][gp } } - TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") { auto gpu_space = make_mock_memory_space(memory::Tier::GPU, 0); @@ -620,7 +619,7 @@ TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") auto table = create_simple_cudf_table(30, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = std::make_shared(1, std::move(gpu_repr)); // Clone 3 times from the same read_only accessor (clone does not consume the accessor) auto ro = batch->to_read_only(); @@ -658,7 +657,7 @@ TEST_CASE("data_batch clone with empty table", "[data_batch][gpu]") auto table = create_simple_cudf_table(0, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = std::make_shared(1, std::move(gpu_repr)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, stream.view()); @@ -680,7 +679,7 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") create_simple_cudf_table(10000, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = std::make_shared(1, std::move(gpu_repr)); auto ro = batch->to_read_only(); auto cloned = ro.clone(2, stream.view()); @@ -712,14 +711,14 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") TEST_CASE("data_batch initial state is idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); REQUIRE(batch->get_state() == batch_state::idle); } TEST_CASE("data_batch state transitions", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); SECTION("idle -> read_only -> idle") { @@ -761,7 +760,7 @@ TEST_CASE("data_batch state transitions", "[data_batch]") TEST_CASE("data_batch non-static to_read_only does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto accessor = batch->to_read_only(); REQUIRE(batch != nullptr); @@ -773,7 +772,7 @@ TEST_CASE("data_batch non-static to_read_only does not consume caller pointer", TEST_CASE("data_batch non-static to_mutable does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto accessor = batch->to_mutable(); REQUIRE(batch != nullptr); @@ -785,7 +784,7 @@ TEST_CASE("data_batch non-static to_mutable does not consume caller pointer", "[ TEST_CASE("data_batch non-static try_to_read_only", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto result = batch->try_to_read_only(); REQUIRE(result.has_value()); @@ -796,7 +795,7 @@ TEST_CASE("data_batch non-static try_to_read_only", "[data_batch]") TEST_CASE("data_batch non-static try_to_mutable", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto result = batch->try_to_mutable(); REQUIRE(result.has_value()); @@ -807,14 +806,13 @@ TEST_CASE("data_batch non-static try_to_mutable", "[data_batch]") TEST_CASE("data_batch non-static try_to_mutable fails when read-locked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto result = batch->try_to_mutable(); REQUIRE_FALSE(result.has_value()); } - // ============================================================================= // convert_to stream synchronization tests // ============================================================================= @@ -826,9 +824,7 @@ struct conversion_sync_observer { bool synced_before_destroy = false; conversion_sync_observer() - { - CUCASCADE_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); - } + { CUCASCADE_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); } ~conversion_sync_observer() { cudaEventDestroy(event); } conversion_sync_observer(const conversion_sync_observer&) = delete; @@ -850,18 +846,14 @@ class observed_gpu_representation : private cucascade::test::mock_memory_space_h } ~observed_gpu_representation() override - { - _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); - } + { _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); } void const* data() const { return _buf.data(); } std::size_t get_size_in_bytes() const override { return _buf.size(); } std::size_t get_uncompressed_data_size_in_bytes() const override { return _buf.size(); } std::unique_ptr clone( [[maybe_unused]] rmm::cuda_stream_view stream) override - { - return nullptr; - } + { return nullptr; } private: rmm::device_buffer _buf; @@ -904,7 +896,7 @@ TEST_CASE("convert_to synchronizes stream before destroying GPU source", "[data_ }); auto gpu_data = std::make_unique(std::move(gpu_buf), observer); - auto batch = std::make_shared(1, std::move(gpu_data)); + auto batch = std::make_shared(1, std::move(gpu_data)); { auto mut = batch->to_mutable(); mut.convert_to(registry, host_space.get(), stream.view()); @@ -948,18 +940,14 @@ class observed_host_representation : private cucascade::test::mock_memory_space_ } ~observed_host_representation() override - { - _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); - } + { _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); } void const* data() const { return _pinned_ptr; } std::size_t get_size_in_bytes() const override { return _size; } std::size_t get_uncompressed_data_size_in_bytes() const override { return _size; } std::unique_ptr clone( [[maybe_unused]] rmm::cuda_stream_view stream) override - { - return nullptr; - } + { return nullptr; } private: void* _pinned_ptr; @@ -1001,7 +989,7 @@ TEST_CASE("convert_to synchronizes stream before destroying HOST source when tar auto host_data = std::make_unique(pinned_host, buf_size, observer); auto gpu_space = make_mock_memory_space(memory::Tier::GPU, 0); - auto batch = std::make_shared(1, std::move(host_data)); + auto batch = std::make_shared(1, std::move(host_data)); { auto mut = batch->to_mutable(); mut.convert_to(registry, gpu_space.get(), stream.view()); @@ -1018,9 +1006,7 @@ TEST_CASE("convert_to synchronizes stream before destroying HOST source when tar // Host callback that blocks the CUDA stream for a fixed duration, used to // create a deterministic window during which stream.synchronize() is blocked. static void CUDART_CB stream_delay_callback(void* /*userData*/) -{ - std::this_thread::sleep_for(std::chrono::milliseconds(50)); -} +{ std::this_thread::sleep_for(std::chrono::milliseconds(50)); } TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync", "[data_batch][convert_to]") @@ -1061,7 +1047,7 @@ TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync }); auto gpu_data = std::make_unique(std::move(gpu_buf), observer); - auto batch = std::make_shared(1, std::move(gpu_data)); + auto batch = std::make_shared(1, std::move(gpu_data)); std::thread convert_thread([&]() { auto mut = batch->to_mutable(); @@ -1108,7 +1094,7 @@ TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync TEST_CASE("data_batch readonly_to_mutable", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto mut = data_batch::readonly_to_mutable(std::move(ro)); @@ -1121,7 +1107,7 @@ TEST_CASE("data_batch readonly_to_mutable", "[data_batch]") TEST_CASE("data_batch mutable_to_readonly", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto mut = batch->to_mutable(); auto ro = data_batch::mutable_to_readonly(std::move(mut)); @@ -1134,7 +1120,7 @@ TEST_CASE("data_batch mutable_to_readonly", "[data_batch]") TEST_CASE("data_batch full cycle: idle -> ro -> mutable -> ro -> idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); auto mut = data_batch::readonly_to_mutable(std::move(ro1)); @@ -1152,7 +1138,7 @@ TEST_CASE("data_batch full cycle: idle -> ro -> mutable -> ro -> idle", "[data_b TEST_CASE("data_batch read_only_count tracks concurrent readers", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); REQUIRE(batch->get_read_only_count() == 0); @@ -1190,7 +1176,7 @@ TEST_CASE("data_batch read_only_count tracks concurrent readers", "[data_batch]" TEST_CASE("data_batch destructor transitions state to idle for read_only", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); { auto ro = batch->to_read_only(); @@ -1203,7 +1189,7 @@ TEST_CASE("data_batch destructor transitions state to idle for read_only", "[dat TEST_CASE("data_batch destructor transitions state to idle for mutable", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); { auto mut = batch->to_mutable(); @@ -1216,7 +1202,7 @@ TEST_CASE("data_batch destructor transitions state to idle for mutable", "[data_ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); // Track event ordering std::vector events; @@ -1306,7 +1292,8 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", t1.join(); t2.join(); - // Validate ordering: readers released before mutable acquired, mutable released before new readers + // Validate ordering: readers released before mutable acquired, mutable released before new + // readers { std::lock_guard guard(events_mutex); auto find_idx = [&](const std::string& prefix) -> size_t { @@ -1334,7 +1321,7 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", TEST_CASE("data_batch move does not change read_only_count", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); REQUIRE(batch->get_read_only_count() == 1); @@ -1360,7 +1347,7 @@ TEST_CASE("read_only_data_batch is copyable", "[data_batch]") TEST_CASE("read_only_data_batch copy constructor acquires new shared lock", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); REQUIRE(batch->get_read_only_count() == 1); @@ -1377,7 +1364,7 @@ TEST_CASE("read_only_data_batch copy constructor acquires new shared lock", "[da TEST_CASE("read_only_data_batch copy destructor decrements count", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); REQUIRE(batch->get_read_only_count() == 1); @@ -1393,7 +1380,7 @@ TEST_CASE("read_only_data_batch copy destructor decrements count", "[data_batch] TEST_CASE("read_only_data_batch copy outlives original", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); std::optional copy; { @@ -1414,7 +1401,7 @@ TEST_CASE("read_only_data_batch copy outlives original", "[data_batch]") TEST_CASE("read_only_data_batch multiple copies all independent", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro1 = batch->to_read_only(); auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) @@ -1439,10 +1426,10 @@ TEST_CASE("read_only_data_batch multiple copies all independent", "[data_batch]" TEST_CASE("read_only_data_batch copy assignment replaces existing lock", "[data_batch]") { auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); + auto batch1 = std::make_shared(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::HOST, 2048); - auto batch2 = std::make_shared(2, std::move(data2)); + auto batch2 = std::make_shared(2, std::move(data2)); auto ro1 = batch1->to_read_only(); auto ro2 = batch2->to_read_only(); @@ -1460,7 +1447,7 @@ TEST_CASE("read_only_data_batch copy assignment replaces existing lock", "[data_ TEST_CASE("read_only_data_batch copy self-assignment is safe", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); REQUIRE(batch->get_read_only_count() == 1); @@ -1474,7 +1461,7 @@ TEST_CASE("read_only_data_batch copy self-assignment is safe", "[data_batch]") TEST_CASE("read_only_data_batch copy blocks mutable access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); auto copy = ro; // NOLINT(performance-unnecessary-copy-initialization) @@ -1498,7 +1485,7 @@ TEST_CASE("read_only_data_batch copy blocks mutable access", "[data_batch]") TEST_CASE("read_only_data_batch last copy destruction transitions to idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); { auto ro1 = batch->to_read_only(); @@ -1514,11 +1501,11 @@ TEST_CASE("read_only_data_batch last copy destruction transitions to idle", "[da TEST_CASE("read_only_data_batch concurrent copies thread safety", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); auto ro = batch->to_read_only(); - constexpr int num_threads = 10; + constexpr int num_threads = 10; constexpr int copies_per_thread = 50; std::vector threads; @@ -1542,7 +1529,7 @@ TEST_CASE("read_only_data_batch concurrent copies thread safety", "[data_batch]" TEST_CASE("read_only_data_batch copy then mutable after all copies released", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); { auto ro1 = batch->to_read_only(); diff --git a/test/data/test_data_repository.cpp b/test/data/test_data_repository.cpp index 96b0e5a..165d75e 100644 --- a/test/data/test_data_repository.cpp +++ b/test/data/test_data_repository.cpp @@ -48,7 +48,7 @@ TEST_CASE("shared_data_repository Add and Pull Single Batch", "[data_repository] shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = synchronized_data_batch::make(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); repository.add_data_batch(batch); @@ -66,7 +66,7 @@ TEST_CASE("shared_data_repository FIFO Order", "[data_repository]") for (uint64_t i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = std::make_shared(i, std::move(data)); repository.add_data_batch(batch); } @@ -87,7 +87,7 @@ TEST_CASE("shared_data_repository Same Batch Multiple Repositories", "[data_repo shared_data_repository repo3; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(42, std::move(data)); + auto batch = std::make_shared(42, std::move(data)); repo1.add_data_batch(batch); repo2.add_data_batch(batch); @@ -133,7 +133,7 @@ TEST_CASE("shared_data_repository Thread-Safe Adding", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); repository.add_data_batch(batch); } }); @@ -161,7 +161,7 @@ TEST_CASE("shared_data_repository Thread-Safe Pulling", "[data_repository]") for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = std::make_shared(i, std::move(data)); repository.add_data_batch(batch); } @@ -204,7 +204,7 @@ TEST_CASE("shared_data_repository Thread-Safe Pulling with Multiple Partitions", for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = std::make_shared(i, std::move(data)); repository.add_data_batch(batch, i % num_partitions); } @@ -260,7 +260,7 @@ TEST_CASE("unique_data_repository Add and Pull Single Batch", "[data_repository] unique_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(1, std::move(data)); + auto batch = std::make_unique(1, std::move(data)); repository.add_data_batch(std::move(batch)); @@ -278,7 +278,7 @@ TEST_CASE("unique_data_repository FIFO Order", "[data_repository]") for (uint64_t i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); + auto batch = std::make_unique(i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -300,7 +300,7 @@ TEST_CASE("unique_data_repository Large Number of Batches", "[data_repository]") for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); + auto batch = std::make_unique(i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -321,7 +321,7 @@ TEST_CASE("unique_data_repository Interleaved Add and Pull", "[data_repository]" for (int cycle = 0; cycle < 50; ++cycle) { for (int i = 0; i < 3; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(cycle * 3 + i, std::move(data)); + auto batch = std::make_unique(cycle * 3 + i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -354,7 +354,7 @@ TEST_CASE("unique_data_repository Thread-Safe Adding", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); repository.add_data_batch(std::move(batch)); } }); @@ -382,7 +382,7 @@ TEST_CASE("unique_data_repository Thread-Safe Pulling", "[data_repository]") for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); + auto batch = std::make_unique(i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -431,7 +431,7 @@ TEST_CASE("unique_data_repository Concurrent Add and Pull", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); repository.add_data_batch(std::move(batch)); std::this_thread::sleep_for(std::chrono::microseconds(10)); @@ -477,7 +477,7 @@ TEST_CASE("unique_data_repository High Contention", "[data_repository]") for (int j = 0; j < operations_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 512); uint64_t batch_id = i * operations_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); repository.add_data_batch(std::move(batch)); ++total_added; @@ -519,14 +519,14 @@ TEST_CASE("shared_data_repository size After Adding", "[data_repository]") REQUIRE(repository.size() == 0); auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); repository.add_data_batch(batch); REQUIRE(repository.size() == 1); for (int i = 2; i <= 5; ++i) { auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(i, std::move(data2)); + auto batch2 = std::make_shared(i, std::move(data2)); repository.add_data_batch(batch2); } @@ -539,7 +539,7 @@ TEST_CASE("shared_data_repository size After Pulling", "[data_repository]") for (int i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = std::make_shared(i, std::move(data)); repository.add_data_batch(batch); } @@ -565,7 +565,7 @@ TEST_CASE("shared_data_repository size Interleaved Operations", "[data_repositor for (int cycle = 0; cycle < 10; ++cycle) { for (int i = 0; i < 3; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(cycle * 3 + i, std::move(data)); + auto batch = std::make_shared(cycle * 3 + i, std::move(data)); repository.add_data_batch(batch); } @@ -598,14 +598,14 @@ TEST_CASE("unique_data_repository size After Adding", "[data_repository]") REQUIRE(repository.size() == 0); auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(1, std::move(data)); + auto batch = std::make_unique(1, std::move(data)); repository.add_data_batch(std::move(batch)); REQUIRE(repository.size() == 1); for (int i = 2; i <= 5; ++i) { auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_unique(i, std::move(data2)); + auto batch2 = std::make_unique(i, std::move(data2)); repository.add_data_batch(std::move(batch2)); } @@ -618,7 +618,7 @@ TEST_CASE("unique_data_repository size After Pulling", "[data_repository]") for (int i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); + auto batch = std::make_unique(i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -644,7 +644,7 @@ TEST_CASE("unique_data_repository size Interleaved Operations", "[data_repositor for (int cycle = 0; cycle < 10; ++cycle) { for (int i = 0; i < 3; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(cycle * 3 + i, std::move(data)); + auto batch = std::make_unique(cycle * 3 + i, std::move(data)); repository.add_data_batch(std::move(batch)); } @@ -679,7 +679,7 @@ TEST_CASE("shared_data_repository size Thread-Safe", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); repository.add_data_batch(batch); if (repository.size() > 0) { ++availability_check_count; } @@ -710,7 +710,7 @@ TEST_CASE("unique_data_repository size Thread-Safe", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); repository.add_data_batch(std::move(batch)); if (repository.size() > 0) { ++availability_check_count; } @@ -741,7 +741,7 @@ TEST_CASE("shared_data_repository size Concurrent Operations", "[data_repository for (int j = 0; j < operations; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * operations + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); repository.add_data_batch(batch); std::this_thread::sleep_for(std::chrono::microseconds(10)); } @@ -769,12 +769,12 @@ TEST_CASE("shared_data_repository size Concurrent Operations", "[data_repository REQUIRE(repository.size() == 0); } -std::vector> create_test_batches(std::vector batch_ids) +std::vector> create_test_batches(std::vector batch_ids) { - std::vector> batches; + std::vector> batches; for (auto batch_id : batch_ids) { auto data = std::make_unique(memory::Tier::GPU, 1024); - batches.emplace_back(std::make_shared(batch_id, std::move(data))); + batches.emplace_back(std::make_shared(batch_id, std::move(data))); } return batches; } @@ -940,7 +940,7 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns idle batch", "[dat shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = std::make_shared(1, std::move(data)); repository.add_data_batch(batch); auto popped = repository.pop_next_data_batch(); @@ -954,7 +954,7 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns read_only batch", shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(2, std::move(data)); + auto batch = std::make_shared(2, std::move(data)); auto accessor = batch->to_read_only(); repository.add_data_batch(batch); @@ -968,7 +968,7 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns mutable batch", "[ shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(3, std::move(data)); + auto batch = std::make_shared(3, std::move(data)); auto accessor = batch->to_mutable(); repository.add_data_batch(batch); @@ -983,11 +983,11 @@ TEST_CASE("shared_data_repository pop_next_data_batch FIFO regardless of state", shared_data_repository repository; auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); + auto batch1 = std::make_shared(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(2, std::move(data2)); + auto batch2 = std::make_shared(2, std::move(data2)); auto data3 = std::make_unique(memory::Tier::GPU, 1024); - auto batch3 = std::make_shared(3, std::move(data3)); + auto batch3 = std::make_shared(3, std::move(data3)); // batch1 read_only, batch2 mutable, batch3 idle — all three should come out in order auto ro_accessor = batch1->to_read_only(); @@ -1017,9 +1017,9 @@ TEST_CASE("shared_data_repository pop_next_data_batch with partitions", "[data_r shared_data_repository repository; auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); + auto batch1 = std::make_shared(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(2, std::move(data2)); + auto batch2 = std::make_shared(2, std::move(data2)); auto accessor = batch1->to_read_only(); repository.add_data_batch(batch1, 0); @@ -1049,9 +1049,9 @@ TEST_CASE("unique_data_repository pop_next_data_batch returns batch regardless o unique_data_repository repository; auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_unique(1, std::move(data1)); + auto batch1 = std::make_unique(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_unique(2, std::move(data2)); + auto batch2 = std::make_unique(2, std::move(data2)); repository.add_data_batch(std::move(batch1)); repository.add_data_batch(std::move(batch2)); diff --git a/test/data/test_data_repository_manager.cpp b/test/data/test_data_repository_manager.cpp index bde2cae..a42bbe7 100644 --- a/test/data/test_data_repository_manager.cpp +++ b/test/data/test_data_repository_manager.cpp @@ -125,7 +125,7 @@ TEST_CASE("shared_data_repository_manager Add Data Batch Single Operator", // Create and add batch auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> operator_ports = {{operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -152,7 +152,7 @@ TEST_CASE("shared_data_repository_manager Add Data Batch Multiple Operators", // Create and add batch to all operators auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> operator_ports; for (size_t id : operator_ids) { @@ -229,7 +229,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Add Batch", "[data_reposit for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } }); @@ -292,7 +292,7 @@ TEST_CASE("unique_data_repository_manager Add Data Batch Single Operator", // Create and add batch auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); std::vector> operator_ports = {{operator_id, "default"}}; manager.add_data_batch(std::move(batch), operator_ports); @@ -317,7 +317,7 @@ TEST_CASE("unique_data_repository_manager Add Batch Multiple Operators Throws", // Create batch auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); // Trying to add to multiple operators should throw std::vector> operator_ports = {{1, "default"}, @@ -341,7 +341,7 @@ TEST_CASE("unique_data_repository_manager Add Multiple Batches", "[data_reposito for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); manager.add_data_batch(std::move(batch), operator_ports); } @@ -377,7 +377,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> all_ports; for (size_t id : operator_ids) { all_ports.push_back({id, "default"}); @@ -389,7 +389,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 2048); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> p0 = {{0, "default"}}; manager.add_data_batch(batch, p0); } @@ -398,7 +398,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 4096); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> p12 = {{1, "default"}, {2, "default"}}; manager.add_data_batch(batch, p12); } @@ -475,7 +475,7 @@ TEST_CASE("shared_data_repository_manager Large Number of Batches", "[data_repos for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -539,7 +539,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Mixed Operations", std::vector threads; std::atomic batch_count{0}; std::mutex pull_mutex; - std::vector> all_batches; + std::vector> all_batches; // Launch threads doing mixed operations for (int i = 0; i < num_threads; ++i) { @@ -551,7 +551,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Mixed Operations", // Add batch to random operator size_t operator_id = (i + j) % 5; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> operator_ports = { {operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -613,7 +613,7 @@ TEST_CASE("shared_data_repository_manager Concurrent Add and Pull", "[data_repos // Add batch to one or more operators size_t operator_id = (i + j) % num_operators; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); std::vector> operator_ports = { {operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -705,7 +705,7 @@ TEST_CASE("shared_data_repository_manager High Contention Add Pull", "[data_repo // Add a batch uint64_t batch_id = manager.get_next_data_batch_id(); auto data = std::make_unique(memory::Tier::GPU, 512); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); ++total_added; @@ -760,7 +760,7 @@ TEST_CASE("shared_data_repository_manager Concurrent Add Multiple Operators Per for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, all_operator_ports); } @@ -861,7 +861,7 @@ TEST_CASE("unique_data_repository_manager Thread-Safe Add Batch", "[data_reposit for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); manager.add_data_batch(std::move(batch), operator_ports); } }); @@ -909,7 +909,7 @@ TEST_CASE("unique_data_repository_manager Concurrent Add and Pull", "[data_repos for (int j = 0; j < batches_per_adder; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); manager.add_data_batch(std::move(batch), operator_ports); ++batches_added; @@ -989,7 +989,7 @@ TEST_CASE("unique_data_repository_manager High Contention", "[data_repository_ma // Add a batch auto data = std::make_unique(memory::Tier::GPU, 512); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); + auto batch = std::make_unique(batch_id, std::move(data)); manager.add_data_batch(std::move(batch), operator_ports); ++total_added; @@ -1072,7 +1072,7 @@ TEST_CASE("shared_data_repository_manager Batches With Different Sizes", for (size_t size : sizes) { auto data = std::make_unique(memory::Tier::GPU, size); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -1104,7 +1104,7 @@ TEST_CASE("shared_data_repository_manager Batches With Different Tiers", for (memory::Tier tier : tiers) { auto data = std::make_unique(tier, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -1135,7 +1135,7 @@ TEST_CASE("shared_data_repository_manager Rapid Add Pull Cycles", "[data_reposit for (int cycle = 0; cycle < 100; ++cycle) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = std::make_shared(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); // Pull the batch From 3524f5d38b5af4adfd4f8779b9327dc6f2f8128c Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Fri, 8 May 2026 18:32:29 +0530 Subject: [PATCH 4/7] modify add_data_batch_impl to not have to handle unique_ptr data_batch --- include/cucascade/data/data_repository.hpp | 1 - .../data/data_repository_manager.hpp | 21 +++---------------- src/data/data_repository.cpp | 19 ++++------------- src/data/data_repository_manager.cpp | 1 - 4 files changed, 7 insertions(+), 35 deletions(-) diff --git a/include/cucascade/data/data_repository.hpp b/include/cucascade/data/data_repository.hpp index 4efb2a6..acd6f51 100644 --- a/include/cucascade/data/data_repository.hpp +++ b/include/cucascade/data/data_repository.hpp @@ -273,6 +273,5 @@ class idata_repository { }; using shared_data_repository = idata_repository>; -using unique_data_repository = idata_repository>; } // namespace cucascade diff --git a/include/cucascade/data/data_repository_manager.hpp b/include/cucascade/data/data_repository_manager.hpp index 0e9927e..d2ec245 100644 --- a/include/cucascade/data/data_repository_manager.hpp +++ b/include/cucascade/data/data_repository_manager.hpp @@ -240,22 +240,8 @@ class data_repository_manager { void add_data_batch_impl(PtrType batch, std::vector>& ops) { std::lock_guard lock(_mutex); - if constexpr (std::is_copy_constructible_v) { - for (auto& op : ops) { - _repositories[{op.first, std::string(op.second)}]->add_data_batch(batch); - } - } else { - if (ops.size() > 1) { - throw std::runtime_error( - "unique_ptr data_batch can only be added to one repository. " - "Use shared_ptr for multiple destinations."); - } - if (!ops.empty()) { - auto& op = ops[0]; - _repositories[{op.first, std::string(op.second)}]->add_data_batch(std::move(batch)); - } else { - throw std::runtime_error("No operator ports provided"); - } + for (auto& op : ops) { + _repositories[{op.first, std::string(op.second)}]->add_data_batch(batch); } } @@ -267,7 +253,6 @@ class data_repository_manager { }; // Type aliases for common use cases -using shared_data_repository_manager = data_repository_manager>; -using unique_data_repository_manager = data_repository_manager>; +using shared_data_repository_manager = data_repository_manager>; } // namespace cucascade diff --git a/src/data/data_repository.cpp b/src/data/data_repository.cpp index bfdebd5..e20b730 100644 --- a/src/data/data_repository.cpp +++ b/src/data/data_repository.cpp @@ -15,19 +15,19 @@ * limitations under the License. */ +#include "cucascade/data/data_batch.hpp" + #include namespace cucascade { // Explicit template instantiations for smart pointer types template class idata_repository>; -template class idata_repository>; // Explicit specialization of get_data_batch_by_id for shared_ptr (copies the pointer) template <> -std::shared_ptr -idata_repository>::get_data_batch_by_id(uint64_t batch_id, - size_t partition_idx) +std::shared_ptr idata_repository>::get_data_batch_by_id( + uint64_t batch_id, size_t partition_idx) { std::unique_lock lock(_mutex); @@ -45,15 +45,4 @@ idata_repository>::get_data_batch_by_id(uint64_ return nullptr; } -// Explicit specialization of get_data_batch_by_id for unique_ptr (not supported) -template <> -std::unique_ptr -idata_repository>::get_data_batch_by_id(uint64_t /*batch_id*/, - size_t /*partition_idx*/) -{ - throw std::runtime_error( - "get_data_batch_by_id is not supported for unique_ptr repositories. " - "Use pop_data_batch to move ownership instead."); -} - } // namespace cucascade diff --git a/src/data/data_repository_manager.cpp b/src/data/data_repository_manager.cpp index 61fb96b..3b9e23b 100644 --- a/src/data/data_repository_manager.cpp +++ b/src/data/data_repository_manager.cpp @@ -25,6 +25,5 @@ namespace cucascade { // Explicit template instantiations for common pointer types template class data_repository_manager>; -template class data_repository_manager>; } // namespace cucascade From bffa10d9c6dbed49423811069fe389927b878084 Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Fri, 8 May 2026 18:49:08 +0530 Subject: [PATCH 5/7] Migrate data batch tests to synchronized API Remove unique repository coverage and update docs for the data_batch accessor model. Co-authored-by: OpenAI Codex --- docs/ARCHITECTURE.md | 62 +- docs/data-management.md | 98 ++- docs/data_batch_state_transitions.md | 40 +- docs/development-guide.md | 16 +- include/cucascade/data/data_batch.hpp | 35 +- include/cucascade/data/data_repository.hpp | 10 +- .../data/data_repository_manager.hpp | 17 +- src/data/data_batch.cpp | 36 +- test/data/test_data_batch.cpp | 704 +++++++----------- test/data/test_data_repository.cpp | 462 +----------- test/data/test_data_repository_manager.cpp | 358 +-------- 11 files changed, 438 insertions(+), 1400 deletions(-) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 38708d5..1176e12 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -203,43 +203,37 @@ sequenceDiagram **File**: `include/cucascade/data/data_batch.hpp` -A `data_batch` is the fundamental unit of data in cuCascade. It wraps a tier-specific data representation (GPU table or host table) and manages concurrent access through a RAII read-only and mutable accessor classes. +A `data_batch` is the fundamental unit of data in cuCascade. It wraps a tier-specific data representation (GPU table or host table) and manages concurrent access through RAII read-only and mutable accessor classes. ```mermaid stateDiagram-v2 direction LR - idle --> task_created : try_to_create_task() - idle --> in_transit : try_to_lock_for_in_transit() - task_created --> processing : try_to_lock_for_processing() - task_created --> in_transit : try_to_lock_for_in_transit() - task_created --> idle : try_to_cancel_task() - processing --> idle : handle destruction - processing --> task_created : try_to_create_task() - in_transit --> idle : try_to_release_in_transit() - in_transit --> task_created : try_to_release_in_transit(task_created) + idle --> read_only : get_read_only() / try_get_read_only() + idle --> mutable_locked : get_mutable() / try_get_mutable() + read_only --> idle : read_only_data_batch::to_idle() + mutable_locked --> idle : mutable_data_batch::to_idle() ``` **States**: -- **idle** -- no pending work, available for scheduling or tier movement -- **task_created** -- a task has been registered but processing hasn't started -- **processing** -- one or more RAII `data_batch_processing_handle`s are active -- **in_transit** -- locked for movement between memory tiers (no concurrent access) +- **idle** -- no active readers or writers; the synchronized handle grants no direct data access +- **read_only** -- one or more `read_only_data_batch` shared locks are active +- **mutable_locked** -- one `mutable_data_batch` exclusive lock is active -The `data_batch_processing_handle` uses RAII to ensure the processing count is always correctly decremented, even on exceptions. +The accessor objects use RAII to release locks automatically, even on exceptions. ### Data Repositories **File**: `include/cucascade/data/data_repository.hpp` -A `data_repository` is a partitioned storage for data batches. It provides blocking `pop` operations that wait until a batch reaches the requested state. +A `data_repository` is partitioned storage for synchronized data batch handles. It provides non-blocking `pop` operations that return `nullptr` when a partition is empty. ```cpp -// Pop a batch that can transition to task_created (blocks if none ready) -auto batch = repository.pop_data_batch(batch_state::task_created); +// Pop the next batch from the default partition +auto batch = repository.pop_next_data_batch(); // Pop a specific batch by ID -auto batch = repository.pop_data_batch_by_id(42, batch_state::in_transit); +auto batch = repository.pop_data_batch_by_id(42); ``` The `data_repository_manager` coordinates multiple repositories across operators/pipelines and provides atomic batch ID generation. @@ -290,25 +284,23 @@ A typical lifecycle of data through cuCascade: ``` 1. INGESTION Create data representation (e.g., gpu_table_representation wrapping a cuDF table) - -> Wrap in data_batch with unique ID from data_repository_manager + -> Wrap in data_batch::make(unique_id, representation) -> Add to data_repository -2. TASK SCHEDULING - batch.try_to_create_task() [idle -> task_created] - repository.pop_data_batch(task_created) [retrieve batch for processing] +2. REPOSITORY DISTRIBUTION + manager.add_data_batch(batch, destinations) + repository.pop_next_data_batch() [retrieve synchronized batch handle] 3. PROCESSING - batch.try_to_lock_for_processing() [task_created -> processing] - -> Returns data_batch_processing_handle (RAII) - -> Access data via batch.get_data() - -> Handle destructs on scope exit [processing -> idle] + auto ro = batch->get_read_only() [shared lock] + -> Access data via ro->get_data() + -> Accessor destructs on scope exit [read_only -> idle when last reader exits] 4. MEMORY PRESSURE (downgrade) memory_space.should_downgrade_memory() [threshold exceeded] - batch.try_to_lock_for_in_transit() [idle -> in_transit] - converter_registry.convert(...) - batch.set_data(new_representation) [data now on HOST] - batch.try_to_release_in_transit() [in_transit -> idle] + auto mut = batch->get_mutable() [exclusive lock] + mut->convert_to(...) + mutable_data_batch::to_idle(std::move(mut)) 5. DATA NEEDED (upgrade) Same flow as downgrade but in reverse tier direction @@ -326,7 +318,7 @@ cuCascade uses a strict lock hierarchy to prevent deadlocks: ``` Level 1: atomic (batch ID generation -- lock-free) | -Level 2: data_batch, read_only_data_batch, mutable_data_batch (3-class sytem provides read-only and mutable access classes) +Level 2: data_batch, read_only_data_batch, mutable_data_batch (3-class system provides read-only and mutable access classes) | Level 3: idata_repository._mutex (protects batch storage) | @@ -339,7 +331,7 @@ Level 6: memory_reservation_manager._wait_mutex (protects reservation waiting) Key synchronization primitives: - **`std::mutex`** -- guards state transitions, storage, and configuration -- **`std::condition_variable`** -- blocks repository pops and reservation requests until satisfied +- **`std::condition_variable`** -- blocks reservation requests until satisfied - **`std::atomic`** -- lock-free counters for batch IDs, allocated bytes, and peak tracking - **`atomic_bounded_counter`** -- CAS-based bounded arithmetic for reservation enforcement - **`notification_channel`** -- signals waiting threads when reservations are released @@ -352,7 +344,7 @@ Key synchronization primitives: |---------|-----------| | **Strategy** | `reservation_request_strategy` subclasses for memory selection | | **Builder** | `reservation_manager_configurator` for fluent system configuration | -| **RAII** | `data_batch_processing_handle`, `borrowed_stream`, `multiple_blocks_allocation`, `notify_on_exit` | +| **RAII** | `read_only_data_batch`, `mutable_data_batch`, `borrowed_stream`, `multiple_blocks_allocation`, `notify_on_exit` | | **Factory** | `DeviceMemoryResourceFactoryFn` for tier-specific allocator creation | | **Adapter** | `reservation_aware_resource_adaptor` wraps RMM resources with tracking | | **3-Class System** | `data_batch` with read-only and mutable class variants which provide locking and accessors | @@ -393,7 +385,7 @@ Key synchronization primitives: |------|---------| | `include/cucascade/data/common.hpp` | `idata_representation` interface | | `include/cucascade/data/data_batch.hpp` | Batch lifecycle, read-only and mutable locking accessor classes| -| `include/cucascade/data/data_repository.hpp` | Partitioned batch storage with blocking pops | +| `include/cucascade/data/data_repository.hpp` | Partitioned batch storage with non-blocking pops | | `include/cucascade/data/data_repository_manager.hpp` | Multi-pipeline repository coordination | | `include/cucascade/data/representation_converter.hpp` | Type-indexed converter registry | | `include/cucascade/data/gpu_data_representation.hpp` | GPU-resident cuDF table wrapper | diff --git a/docs/data-management.md b/docs/data-management.md index b4128d0..79c740a 100644 --- a/docs/data-management.md +++ b/docs/data-management.md @@ -13,7 +13,7 @@ A deep dive into cuCascade's data lifecycle, batch read-only and mutable locking - [Data Batch Lifecycle](#data-batch-lifecycle) - [States](#states) - [State Transitions](#state-transitions) - - [Processing Handles](#processing-handles) + - [RAII Accessor Classes](#raii-accessor-classes) - [Thread Safety](#thread-safety) - [Cloning](#cloning) - [Representation Conversion](#representation-conversion) @@ -23,7 +23,7 @@ A deep dive into cuCascade's data lifecycle, batch read-only and mutable locking - [Data Repositories](#data-repositories) - [Add and Pop Semantics](#add-and-pop-semantics) - [Partitioning](#partitioning) - - [shared_ptr vs unique_ptr Repositories](#shared_ptr-vs-unique_ptr-repositories) + - [Repository Pointer Type](#repository-pointer-type) - [Data Repository Manager](#data-repository-manager) - [Operator Port Keys](#operator-port-keys) - [Batch ID Generation](#batch-id-generation) @@ -40,7 +40,7 @@ The data module manages the lifecycle of data as it flows through processing pip - **Tier-agnostic data representations** -- abstract interface with GPU and host implementations - **Locking read-only and mutable accessor classes for batch lifecycle** -- prevents concurrent access conflicts during processing and tier movement - **Type-indexed conversion** -- extensible registry for converting data between representations -- **Partitioned repositories** -- thread-safe storage with blocking retrieval +- **Partitioned repositories** -- thread-safe storage with non-blocking retrieval - **Multi-pipeline coordination** -- manages data across operators with atomic ID generation The data module depends on the memory module for allocators, memory spaces, and CUDA streams. @@ -176,24 +176,22 @@ appropriate lock — the idle `data_batch` pointer grants no data access. stateDiagram-v2 direction LR - idle --> read_only : to_read_only() / try_to_read_only() - idle --> mutable_locked : to_mutable() / try_to_mutable() + idle --> read_only : get_read_only() / try_get_read_only() + idle --> mutable_locked : get_mutable() / try_get_mutable() - read_only --> idle : to_idle(read_only_data_batch&&) [last reader] - read_only --> mutable_locked : readonly_to_mutable(read_only_data_batch&&) + read_only --> idle : read_only_data_batch::to_idle(read_only_data_batch&&) [last reader] - mutable_locked --> idle : to_idle(mutable_data_batch&&) - mutable_locked --> read_only : mutable_to_readonly(mutable_data_batch&&) + mutable_locked --> idle : mutable_data_batch::to_idle(mutable_data_batch&&) ``` **Key rules**: -- Non-static transitions (`to_read_only`, `to_mutable`, `try_to_*`) use `shared_from_this()` and do not consume the caller's `shared_ptr`. -- Static transitions (`to_idle`, `readonly_to_mutable`, `mutable_to_readonly`) consume the accessor via `&&`, making the source null at the call site — the compiler enforces that you cannot use an accessor after releasing it. -- `to_read_only()` / `to_mutable()` **block** until the lock is available. -- `try_to_read_only()` / `try_to_mutable()` are **non-blocking** and return `std::nullopt` on failure. +- Access acquisition (`get_read_only`, `get_mutable`, `try_get_*`) uses the caller's `shared_ptr` and does not consume it. +- `read_only_data_batch::to_idle()` and `mutable_data_batch::to_idle()` consume the accessor via `&&`, making the source null at the call site. +- `get_read_only()` and `get_mutable()` **block** until the lock is available. +- `try_get_read_only()` and `try_get_mutable()` are **non-blocking** and return `std::nullopt` on failure. - Multiple `read_only_data_batch` handles may coexist on the same batch (concurrent reads). - `mutable_data_batch` is exclusive: it cannot coexist with any other reader or writer. -- Copying a `read_only_data_batch` acquires a new shared lock on the parent, incrementing `_read_only_count`. +- `read_only_data_batch` is move-only; use `clone_read_only_access()` to acquire another shared lock on the same batch. See [data_batch_state_transitions.md](data_batch_state_transitions.md) for the complete reference. @@ -205,47 +203,40 @@ Access to batch data is only possible through one of two RAII accessor classes: ```cpp // Acquire shared lock (blocks if exclusive lock held) -read_only_data_batch ro = batch->to_read_only(); +read_only_data_batch ro = batch->get_read_only(); // Access data -auto* data = ro.get_data()->cast(); +auto* data = ro->get_data()->cast(); process(data->get_table_view()); // Release: either let ro go out of scope, or explicitly return to idle -auto idle_batch = cucascade::data_batch::to_idle(std::move(ro)); +auto idle_batch = cucascade::read_only_data_batch::to_idle(std::move(ro)); ``` Properties: -- **Copyable** — each copy acquires a new shared lock; `_read_only_count` increments per copy. -- **Movable** — moves transfer lock ownership without changing the count. +- **Move-only** — moves transfer lock ownership without changing the count. +- `clone_read_only_access()` acquires another shared lock when a second reader handle is needed. - Destruction or `to_idle()` decrements `_read_only_count`; the batch returns to `idle` when the count reaches zero. **`mutable_data_batch`** — exclusive (write) lock: ```cpp // Acquire exclusive lock (blocks until all readers and writers release) -mutable_data_batch mut = batch->to_mutable(); +mutable_data_batch mut = batch->get_mutable(); // Read and write data -mut.set_data(std::move(new_representation)); +mut->set_data(std::move(new_representation)); // Release back to idle -auto idle_batch = cucascade::data_batch::to_idle(std::move(mut)); +auto idle_batch = cucascade::mutable_data_batch::to_idle(std::move(mut)); ``` Properties: - **Move-only** — no copies allowed; only one exclusive lock can exist at a time. - Destruction or `to_idle()` releases the exclusive lock. -**Upgrade / Downgrade**: - -```cpp -// Upgrade: shared → exclusive (releases shared, acquires exclusive — may block) -mutable_data_batch mut = cucascade::data_batch::readonly_to_mutable(std::move(ro)); - -// Downgrade: exclusive → shared (releases exclusive, acquires shared — may block) -read_only_data_batch ro2 = cucascade::data_batch::mutable_to_readonly(std::move(mut)); -``` +There is no direct read-to-write upgrade or write-to-read downgrade API. Release the current +accessor first, then acquire the next access mode from the returned `shared_ptr`. ### Thread Safety @@ -266,12 +257,12 @@ This is used by the pipeline and downgrade executor to track which batches are s ### Cloning ```cpp -// Via read-only accessor (caller already holds lock) -read_only_data_batch ro = batch->to_read_only(); -auto cloned = ro.clone(new_batch_id, stream); +// Acquire read access while inspecting the source, then clone from the synchronized handle. +read_only_data_batch ro = batch->get_read_only(); +auto cloned = batch->clone(new_batch_id, stream); // With representation conversion -auto cloned = ro.clone_to(registry, new_batch_id, host_space, stream); +auto cloned = batch->clone_to(registry, new_batch_id, host_space, stream); ``` Cloning produces a new `shared_ptr` in `idle` state with the given ID. The clone @@ -380,8 +371,8 @@ repository.add_data_batch(batch_ptr, partition_idx); // Pop the next batch from a partition (non-blocking; returns nullptr if empty) auto batch = repository.pop_next_data_batch(partition_idx); if (batch) { - auto ro = batch->to_read_only(); // acquire shared lock - process(ro.get_data()); + auto ro = batch->get_read_only(); // acquire shared lock + process(ro->get_data()); } // Pop a specific batch by ID (returns nullptr if not found) @@ -406,17 +397,17 @@ repository.add_data_batch(batch_a, 0); repository.add_data_batch(batch_b, 1); // Pop from partition 0 only -auto batch = repository.pop_data_batch(batch_state::task_created, 0); +auto batch = repository.pop_next_data_batch(0); ``` -### shared_ptr vs unique_ptr Repositories +### Repository Pointer Type | Type | Alias | Use Case | |------|-------|----------| | `idata_repository>` | `shared_data_repository` | Same batch shared across multiple repositories (fan-out) | -| `idata_repository>` | `unique_data_repository` | Each batch owned by exactly one repository | -Key difference: `get_data_batch_by_id()` (non-removing access) is only available with `shared_ptr` repositories. +Repositories store synchronized `data_batch` handles as `std::shared_ptr`. The +non-removing `get_data_batch_by_id()` API returns another copy of that synchronized handle. --- @@ -451,14 +442,12 @@ uint64_t id = manager.get_next_data_batch_id(); // atomic increment The `add_data_batch()` method distributes a batch to one or more repositories: ```cpp -// shared_ptr: same batch goes to multiple repositories +// Same synchronized batch handle goes to multiple repositories manager.add_data_batch(shared_batch, {{0, "output"}, {1, "input"}}); - -// unique_ptr: batch goes to exactly one repository (throws if multiple specified) -manager.add_data_batch(std::move(unique_batch), {{1, "input"}}); ``` -The manager also provides `get_data_batches_for_downgrade()` to find batches eligible for tier demotion based on their memory space. +Repository managers only distribute and retrieve synchronized batch handles. Tier demotion is +performed by acquiring mutable access to a specific batch and converting its representation. --- @@ -472,9 +461,7 @@ The data and memory modules are connected at several points: 3. **Reservation tracking** -- when data is converted between tiers, the converter allocates in the target memory space using its allocator and reservation system -4. **Downgrade coordination** -- the application queries `memory_space.should_downgrade_memory()` and uses `data_repository_manager.get_data_batches_for_downgrade()` to find candidates - -5. **Processing validation** -- `try_to_lock_for_processing()` checks that the requested `memory_space_id` matches the batch's current location +4. **Tier movement** -- the application queries memory pressure, acquires mutable batch access, and calls `convert_to()` through the accessor ``` Application @@ -488,17 +475,18 @@ Application |-- data_repository.pop_next_data_batch(partition_idx) | |-- returns shared_ptr (idle, non-blocking) | - |-- batch->to_read_only() [shared lock] + |-- batch->get_read_only() [shared lock] | |-- returns read_only_data_batch (blocks until available) - | |-- ro.get_data() grants read access to idata_representation + | |-- ro->get_data() grants read access to idata_representation | |-- [on memory pressure] - | mutable_data_batch mut = data_batch::readonly_to_mutable(std::move(ro)) - | mut.convert_to(registry, target_space, stream) + | read_only_data_batch::to_idle(std::move(ro)) [release shared lock] + | mutable_data_batch mut = batch->get_mutable() [exclusive lock] + | mut->convert_to(registry, target_space, stream) | |-- converter_registry.convert(data, target_space, stream) | |-- allocates in target memory_space | |-- cudaMemcpy between tiers - | data_batch::to_idle(std::move(mut)) [release exclusive lock] + | mutable_data_batch::to_idle(std::move(mut)) [release exclusive lock] ``` --- @@ -509,7 +497,7 @@ Application |------|---------| | `include/cucascade/data/common.hpp` | `idata_representation` abstract interface | | `include/cucascade/data/data_batch.hpp` | `data_batch`, `read_only_data_batch`, `mutable_data_batch`, `batch_state` | -| `include/cucascade/data/data_repository.hpp` | `idata_repository`, `shared_data_repository`, `unique_data_repository` | +| `include/cucascade/data/data_repository.hpp` | `idata_repository`, `shared_data_repository` | | `include/cucascade/data/data_repository_manager.hpp` | `data_repository_manager`, `operator_port_key` | | `include/cucascade/data/representation_converter.hpp` | `representation_converter_registry`, `converter_key` | | `include/cucascade/data/gpu_data_representation.hpp` | `gpu_table_representation` wrapping `cudf::table` | diff --git a/docs/data_batch_state_transitions.md b/docs/data_batch_state_transitions.md index 9d195e5..7677a6a 100644 --- a/docs/data_batch_state_transitions.md +++ b/docs/data_batch_state_transitions.md @@ -24,29 +24,27 @@ no access to the underlying data. ### Allowed Transitions -#### Non-static (via `shared_from_this` — caller's `shared_ptr` is NOT consumed) +#### Access Acquisition (caller's `shared_ptr` is not consumed) | From | To | Method | Blocking | |------|----|--------|----------| -| `idle` | `read_only` | `to_read_only()` | Yes — blocks until shared lock available | -| `idle` | `read_only` | `try_to_read_only()` | No — returns `std::nullopt` if exclusive lock held | -| `idle` | `mutable_locked` | `to_mutable()` | Yes — blocks until exclusive lock available | -| `idle` | `mutable_locked` | `try_to_mutable()` | No — returns `std::nullopt` if any lock held | +| `idle` | `read_only` | `get_read_only()` | Yes — blocks until shared lock available | +| `idle` | `read_only` | `try_get_read_only()` | No — returns `std::nullopt` if exclusive lock held | +| `idle` | `mutable_locked` | `get_mutable()` | Yes — blocks until exclusive lock available | +| `idle` | `mutable_locked` | `try_get_mutable()` | No — returns `std::nullopt` if any lock held | -#### Static (consume accessor via `&&` — source is left null) +#### Access Release (consume accessor via `&&`; source is left null) | From | To | Method | Blocking | Notes | |------|----|--------|----------|-------| -| `read_only` | `idle` | `to_idle(read_only_data_batch&&)` | No | Returns `shared_ptr`; state becomes `idle` when last reader releases | -| `mutable_locked` | `idle` | `to_idle(mutable_data_batch&&)` | No | Releases exclusive lock; returns `shared_ptr` | -| `read_only` | `mutable_locked` | `readonly_to_mutable(read_only_data_batch&&)` | Yes — blocks until exclusive lock available | Releases shared lock first, then acquires exclusive | -| `mutable_locked` | `read_only` | `mutable_to_readonly(mutable_data_batch&&)` | Yes — blocks until shared lock available | Releases exclusive lock first, then acquires shared | +| `read_only` | `idle` | `read_only_data_batch::to_idle(read_only_data_batch&&)` | No | Returns `shared_ptr`; state becomes `idle` when last reader releases | +| `mutable_locked` | `idle` | `mutable_data_batch::to_idle(mutable_data_batch&&)` | No | Releases exclusive lock; returns `shared_ptr` | ### Disallowed / Non-Transitions -- `try_to_read_only()` returns `std::nullopt` when a `mutable_data_batch` is active (exclusive lock held). -- `try_to_mutable()` returns `std::nullopt` when any lock is held (shared or exclusive). -- `to_read_only()` and `to_mutable()` on a batch not managed by `shared_ptr` throw `std::bad_weak_ptr` (they require `shared_from_this()`). +- `try_get_read_only()` returns `std::nullopt` when a `mutable_data_batch` is active (exclusive lock held). +- `try_get_mutable()` returns `std::nullopt` when any lock is held (shared or exclusive). +- There is no direct read-to-write upgrade or write-to-read downgrade API. Release the current accessor, then acquire the next access mode from the returned `shared_ptr`. ### Subscriber Counting @@ -66,24 +64,22 @@ Independent of the locking model, a batch maintains an atomic subscriber interes stateDiagram-v2 direction LR - idle --> read_only : to_read_only() [blocking]\ntry_to_read_only() [non-blocking] - idle --> mutable_locked : to_mutable() [blocking]\ntry_to_mutable() [non-blocking] + idle --> read_only : get_read_only() [blocking]\ntry_get_read_only() [non-blocking] + idle --> mutable_locked : get_mutable() [blocking]\ntry_get_mutable() [non-blocking] - read_only --> idle : to_idle(read_only_data_batch&&)\n[last reader → idle] - read_only --> mutable_locked : readonly_to_mutable(read_only_data_batch&&)\n[releases shared, acquires exclusive] + read_only --> idle : read_only_data_batch::to_idle(read_only_data_batch&&)\n[last reader -> idle] - mutable_locked --> idle : to_idle(mutable_data_batch&&) - mutable_locked --> read_only : mutable_to_readonly(mutable_data_batch&&)\n[releases exclusive, acquires shared] + mutable_locked --> idle : mutable_data_batch::to_idle(mutable_data_batch&&) note right of read_only Multiple read_only_data_batch handles may coexist (concurrent reads). - try_to_mutable() returns nullopt. + try_get_mutable() returns nullopt. end note note right of mutable_locked Exclusive access. - try_to_read_only() returns nullopt. - try_to_mutable() returns nullopt. + try_get_read_only() returns nullopt. + try_get_mutable() returns nullopt. end note ``` diff --git a/docs/development-guide.md b/docs/development-guide.md index 79628d9..b13c3e4 100644 --- a/docs/development-guide.md +++ b/docs/development-guide.md @@ -211,14 +211,14 @@ cuCascade uses [Catch2](https://github.com/catchorg/Catch2) v2.13.10 (fetched fr Tests use BDD-style assertions: ```cpp -TEST_CASE("data batch transitions to task_created", "[data_batch]") { - auto batch = data_batch(1, make_gpu_representation()); +TEST_CASE("data batch acquires read access", "[data_batch]") { + auto batch = data_batch::make(1, make_gpu_representation()); - REQUIRE(batch.get_state() == batch_state::idle); + REQUIRE(batch->get_state() == batch_state::idle); - bool success = batch.try_to_create_task(); - REQUIRE(success); - REQUIRE(batch.get_state() == batch_state::task_created); + auto ro = batch->get_read_only(); + REQUIRE(batch->get_state() == batch_state::read_only); + REQUIRE(ro->get_batch_id() == 1); } ``` @@ -228,8 +228,8 @@ All tests compile into a single executable `cucascade_tests`: | Directory | Tests | Coverage | |-----------|-------|----------| -| `test/data/test_data_batch.cpp` | 47+ cases | State transitions, processing handles, cloning | -| `test/data/test_data_repository.cpp` | 140+ cases | Add/pop, partitioning, blocking, threading | +| `test/data/test_data_batch.cpp` | 47+ cases | Accessor transitions, locking, cloning | +| `test/data/test_data_repository.cpp` | 140+ cases | Add/pop, partitioning, threading | | `test/data/test_data_repository_manager.cpp` | 100+ cases | Multi-operator, batch IDs, concurrent access | | `test/data/test_data_representation.cpp` | Representation interface | Size, tier, clone operations | | `test/data/test_representation_converter.cpp` | Converter registry | Registration, lookup, conversion | diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index 2427e2b..f2ae38f 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -53,15 +53,14 @@ class mutable_data_batch; class data_batch; /** - * @brief Core data batch type representing the "idle" (unlocked) state. + * @brief Internal data batch payload owned by data_batch. * - * Owns the data representation, a reader-writer mutex, and subscriber bookkeeping. - * Almost nothing is publicly accessible -- data, tier, and memory space are private - * and can only be reached through RAII accessor types that hold the appropriate lock. + * Owns the data representation. Data, tier, and memory-space access are exposed here, + * but this core object is only reachable through RAII accessor types that hold the + * appropriate lock. * - * State transitions are static methods that move ownership of the accessor, - * making the source null at the call site. This provides compile-time enforcement:1 - * once a batch is locked, the caller cannot access the idle handle. + * State transitions and synchronization live on data_batch. This core object only + * exposes data/tier/memory methods to friend accessor classes. * * @note Non-copyable and non-movable. The object itself never moves; only the * smart pointer to it is transferred between states. @@ -157,10 +156,10 @@ class data_batch_core { }; /** - * @brief Synchronized data batch type allowing thread safe access to the code data batch - * via RAII accessers. + * @brief Synchronized data batch type allowing thread-safe access to the core data batch + * via RAII accessors. */ -class data_batch : std::enable_shared_from_this { +class data_batch : public std::enable_shared_from_this { friend read_only_data_batch; friend mutable_data_batch; @@ -317,13 +316,11 @@ class data_batch : std::enable_shared_from_this { * @brief RAII read-only accessor for data_batch. * * Holds a shared lock on the parent data_batch's mutex, permitting concurrent - * readers. Data is accessible through named methods that delegate to data_batch's - * private interface. Clone operations are available to create independent copies - * while the read lock is held. + * readers. Data is accessible through operator forwarding to data_batch_core. * - * Copyable. Copying acquires a new shared lock on the same parent data_batch, - * incrementing the reader count. The shared lock is released when this object - * is destroyed, moved-from, or overwritten by assignment. + * Move-only. Use clone_read_only_access() to acquire an additional shared lock + * on the same parent batch. The shared lock is released when this object is + * destroyed, moved-from, or overwritten by assignment. */ class read_only_data_batch { friend class data_batch; @@ -370,7 +367,7 @@ class read_only_data_batch { read_only_data_batch(std::shared_ptr parent, std::shared_lock lock); - // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. + // INVARIANT: _owner must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the shared lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. std::shared_ptr _owner; ///< Parent lifetime (destroyed second) @@ -406,7 +403,7 @@ class mutable_data_batch { std::shared_ptr ptr = accessor._owner; { auto _ = std::move(accessor); - } // destroy accessor, releasing shared lock + } // destroy accessor, releasing exclusive lock return ptr; } @@ -419,7 +416,7 @@ class mutable_data_batch { */ mutable_data_batch(std::shared_ptr parent, std::unique_lock lock); - // INVARIANT: _batch must be declared before _lock -- destruction order is load-bearing. + // INVARIANT: _owner must be declared before _lock -- destruction order is load-bearing. // When destroyed, _lock releases the exclusive lock first, then _batch drops the parent // reference. This prevents accessing a destroyed mutex. std::shared_ptr _owner; ///< Parent lifetime (destroyed second) diff --git a/include/cucascade/data/data_repository.hpp b/include/cucascade/data/data_repository.hpp index acd6f51..0b852f3 100644 --- a/include/cucascade/data/data_repository.hpp +++ b/include/cucascade/data/data_repository.hpp @@ -45,7 +45,7 @@ namespace cucascade { * - Thread-safe access to shared data structures * * @tparam PtrType The smart pointer type used to manage data_batch lifecycle. - * Typically std::shared_ptr or std::unique_ptr. + * The public repository alias uses std::shared_ptr. * * @note Implementations must be thread-safe as multiple threads may access * the repository concurrently during query execution. @@ -66,11 +66,10 @@ class idata_repository { /** * @brief Add a new data batch to this repository. * - * The repository takes ownership of the data_batch (for unique_ptr) or shares - * ownership (for shared_ptr) and will manage its lifecycle according to the + * The repository stores the data_batch pointer and manages it according to the * implementation's storage policy. * - * @param batch Smart pointer to the data_batch to add (ownership transferred/shared) + * @param batch Smart pointer to the data_batch to add * @param partition_idx Index of the partition to add the batch to (default: 0) * * @note Thread-safe operation protected by internal mutex @@ -158,8 +157,7 @@ class idata_repository { * @return PtrType A copy of the data batch pointer with the matching batch_id, or nullptr * * @note Thread-safe operation protected by internal mutex - * @note Only supported for shared_ptr repositories. Will throw for unique_ptr repositories. - * @throws std::runtime_error if called on unique_ptr repository + * @note Supported for shared_ptr repositories by returning a copy of the pointer. * @throws std::out_of_range if partition_idx is out of range */ virtual PtrType get_data_batch_by_id(uint64_t batch_id, size_t partition_idx = 0); diff --git a/include/cucascade/data/data_repository_manager.hpp b/include/cucascade/data/data_repository_manager.hpp index d2ec245..822a5e9 100644 --- a/include/cucascade/data/data_repository_manager.hpp +++ b/include/cucascade/data/data_repository_manager.hpp @@ -43,7 +43,9 @@ struct operator_port_key { std::string port_id; bool operator==(const operator_port_key& other) const - { return operator_id == other.operator_id && port_id == other.port_id; } + { + return operator_id == other.operator_id && port_id == other.port_id; + } bool operator<(const operator_port_key& other) const { @@ -76,7 +78,7 @@ struct operator_port_key { * a unified interface for higher-level components like the GPU executor and memory manager. * * @tparam PtrType The smart pointer type used to manage data_batch lifecycle. - * Typically std::shared_ptr or std::unique_ptr. + * The public manager alias uses std::shared_ptr. * * @note All operations are thread-safe and can be called concurrently from multiple * pipeline execution threads. @@ -125,8 +127,7 @@ class data_repository_manager { /** * @brief Add a data_batch to specified operator repositories. * - * For shared_ptr: The batch is copied to each repository. - * For unique_ptr: This method requires only one operator (single owner semantics). + * The shared batch pointer is copied to each destination repository. * * @param batch The data_batch smart pointer to add * @param ops The operator IDs and ports whose repositories will receive this batch @@ -134,7 +135,9 @@ class data_repository_manager { * @note Thread-safe operation */ void add_data_batch(PtrType batch, std::vector> ops) - { add_data_batch_impl(std::move(batch), ops); } + { + add_data_batch_impl(std::move(batch), ops); + } /** * @brief Get direct access to a repository for advanced operations. @@ -151,7 +154,9 @@ class data_repository_manager { * safety */ std::unique_ptr& get_repository(size_t operator_id, std::string_view port_id) - { return _repositories.at({operator_id, std::string(port_id)}); } + { + return _repositories.at({operator_id, std::string(port_id)}); + } /** * @brief Generate a globally unique data batch identifier. diff --git a/src/data/data_batch.cpp b/src/data/data_batch.cpp index f5024ba..da41c09 100644 --- a/src/data/data_batch.cpp +++ b/src/data/data_batch.cpp @@ -19,7 +19,7 @@ namespace cucascade { -// ========== data_batch ========== +// ========== data_batch_core ========== uint64_t data_batch_core::get_batch_id() const { return _batch_id; } @@ -28,21 +28,21 @@ memory::Tier data_batch_core::get_current_tier() const { return _data->get_curre idata_representation* data_batch_core::get_data() const { return _data.get(); } memory::memory_space* data_batch_core::get_memory_space() const -{ return &(_data->get_memory_space()); } +{ + return &(_data->get_memory_space()); +} data_batch_core::data_batch_core(uint64_t batch_id, std::unique_ptr data) : _batch_id(batch_id), _data(std::move(data)) { } -// ========== synchronized_data_batch ========== +// ========== data_batch ========== std::shared_ptr data_batch::make(uint64_t batch_id, std::unique_ptr data) { - if (data == nullptr) { - throw std::runtime_error("data is null in synchronized_data_batch constructor"); - } + if (data == nullptr) { throw std::runtime_error("data is null in data_batch constructor"); } return std::shared_ptr(new data_batch(batch_id, std::move(data))); } @@ -132,26 +132,6 @@ read_only_data_batch& read_only_data_batch::operator=(read_only_data_batch&& oth return *this; } -// read_only_data_batch& read_only_data_batch::operator=(const read_only_data_batch& other) -// { -// if (this != &other) { -// if (_batch) { -// auto prev = _batch->_read_only_count.fetch_sub(1); -// if (prev == 1) { _batch->_state.store(batch_state::idle); } -// _lock.unlock(); -// } -// _batch = other._batch; -// if (_batch) { -// _lock = std::shared_lock(_batch->_rw_mutex); -// _batch->_read_only_count.fetch_add(1); -// _batch->_state.store(batch_state::read_only); -// } else { -// _lock = std::shared_lock(); -// } -// } -// return *this; -// } - read_only_data_batch::read_only_data_batch(std::shared_ptr owner, std::shared_lock lock) : _owner(std::move(owner)), _lock(std::move(lock)) @@ -195,6 +175,8 @@ mutable_data_batch& mutable_data_batch::operator=(mutable_data_batch&& other) no mutable_data_batch::mutable_data_batch(std::shared_ptr owner, std::unique_lock lock) : _owner(std::move(owner)), _lock(std::move(lock)) -{ _owner->_state.store(batch_state::mutable_locked); } +{ + _owner->_state.store(batch_state::mutable_locked); +} } // namespace cucascade diff --git a/test/data/test_data_batch.cpp b/test/data/test_data_batch.cpp index 5b2414e..f588a35 100644 --- a/test/data/test_data_batch.cpp +++ b/test/data/test_data_batch.cpp @@ -50,22 +50,18 @@ using cucascade::test::mock_data_representation; // Construction tests (TEST-01) // ============================================================================= -TEST_CASE("data_batch construction via shared_ptr", "[data_batch]") +TEST_CASE("data_batch construction via factory", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); REQUIRE(batch->get_batch_id() == 1); REQUIRE(batch->get_subscriber_count() == 0); } -TEST_CASE("data_batch construction via unique_ptr", "[data_batch]") +TEST_CASE("data_batch construction rejects null data", "[data_batch]") { - auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_unique(1, std::move(data)); - - REQUIRE(batch->get_batch_id() == 1); - REQUIRE(batch->get_subscriber_count() == 0); + REQUIRE_THROWS_AS(data_batch::make(1, nullptr), std::runtime_error); } // ============================================================================= @@ -78,6 +74,13 @@ TEST_CASE("data_batch is non-copyable and non-movable", "[data_batch]") static_assert(!std::is_move_constructible_v); static_assert(!std::is_copy_assignable_v); static_assert(!std::is_move_assignable_v); + + static_assert(!std::is_copy_constructible_v); + static_assert(!std::is_move_constructible_v); + static_assert(!std::is_copy_assignable_v); + static_assert(!std::is_move_assignable_v); + static_assert( + !std::is_constructible_v>); } // ============================================================================= @@ -87,112 +90,112 @@ TEST_CASE("data_batch is non-copyable and non-movable", "[data_batch]") TEST_CASE("data_batch get_batch_id is lock-free via shared_ptr", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(99, std::move(data)); + auto batch = data_batch::make(99, std::move(data)); // get_batch_id works without acquiring any lock REQUIRE(batch->get_batch_id() == 99); // Also works through the mutable accessor auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(99, std::move(data2)); - auto rw = batch2->to_mutable(); - REQUIRE(rw.get_batch_id() == 99); + auto batch2 = data_batch::make(99, std::move(data2)); + auto rw = batch2->get_mutable(); + REQUIRE(rw->get_batch_id() == 99); } // ============================================================================= // read_only_data_batch tests (TEST-01) // ============================================================================= -TEST_CASE("data_batch to_read_only acquires shared access", "[data_batch]") +TEST_CASE("data_batch get_read_only acquires shared access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro = batch->to_read_only(); - REQUIRE(ro.get_batch_id() == 1); - REQUIRE(ro.get_current_tier() == memory::Tier::GPU); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_batch_id() == 1); + REQUIRE(ro->get_current_tier() == memory::Tier::GPU); } TEST_CASE("data_batch multiple concurrent read_only via shared_ptr copies", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro1 = batch->to_read_only(); - auto ro2 = batch->to_read_only(); - auto ro3 = batch->to_read_only(); + auto ro1 = batch->get_read_only(); + auto ro2 = batch->get_read_only(); + auto ro3 = batch->get_read_only(); - REQUIRE(ro1.get_batch_id() == 1); - REQUIRE(ro2.get_batch_id() == 1); - REQUIRE(ro3.get_batch_id() == 1); + REQUIRE(ro1->get_batch_id() == 1); + REQUIRE(ro2->get_batch_id() == 1); + REQUIRE(ro3->get_batch_id() == 1); } // ============================================================================= // Try variants (TEST-04) // ============================================================================= -TEST_CASE("data_batch try_to_read_only succeeds when unlocked", "[data_batch]") +TEST_CASE("data_batch try_get_read_only succeeds when unlocked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto result = batch->try_to_read_only(); + auto result = batch->try_get_read_only(); REQUIRE(result.has_value()); - REQUIRE(result->get_batch_id() == 1); + REQUIRE((*result)->get_batch_id() == 1); } -TEST_CASE("data_batch try_to_read_only fails when mutable lock held", "[data_batch]") +TEST_CASE("data_batch try_get_read_only fails when mutable lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto rw = batch->to_mutable(); + auto rw = batch->get_mutable(); std::atomic got_lock{false}; std::thread t([&batch, &got_lock]() { - auto result = batch->try_to_read_only(); + auto result = batch->try_get_read_only(); got_lock.store(result.has_value()); }); t.join(); REQUIRE(got_lock.load() == false); } -TEST_CASE("data_batch try_to_mutable succeeds when unlocked", "[data_batch]") +TEST_CASE("data_batch try_get_mutable succeeds when unlocked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto result = batch->try_to_mutable(); + auto result = batch->try_get_mutable(); REQUIRE(result.has_value()); - REQUIRE(result->get_batch_id() == 1); + REQUIRE((*result)->get_batch_id() == 1); } -TEST_CASE("data_batch try_to_mutable fails when readonly lock held", "[data_batch]") +TEST_CASE("data_batch try_get_mutable fails when readonly lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro = batch->to_read_only(); + auto ro = batch->get_read_only(); std::atomic got_lock{false}; std::thread t([&batch, &got_lock]() { - auto result = batch->try_to_mutable(); + auto result = batch->try_get_mutable(); got_lock.store(result.has_value()); }); t.join(); REQUIRE(got_lock.load() == false); } -TEST_CASE("data_batch try_to_mutable fails when mutable lock held", "[data_batch]") +TEST_CASE("data_batch try_get_mutable fails when mutable lock held", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto rw = batch->to_mutable(); + auto rw = batch->get_mutable(); std::atomic got_lock{false}; std::thread t([&batch, &got_lock]() { - auto result = batch->try_to_mutable(); + auto result = batch->try_get_mutable(); got_lock.store(result.has_value()); }); t.join(); @@ -203,27 +206,27 @@ TEST_CASE("data_batch try_to_mutable fails when mutable lock held", "[data_batch // mutable_data_batch tests (TEST-01) // ============================================================================= -TEST_CASE("data_batch to_mutable acquires exclusive access", "[data_batch]") +TEST_CASE("data_batch get_mutable acquires exclusive access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto rw = batch->to_mutable(); - REQUIRE(rw.get_batch_id() == 1); + auto rw = batch->get_mutable(); + REQUIRE(rw->get_batch_id() == 1); } TEST_CASE("data_batch mutable blocks until readonly released", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); // Acquire read-only on a heap-allocated accessor so we can control its lifetime - auto ro = std::make_unique(batch->to_read_only()); + auto ro = std::make_unique(batch->get_read_only()); std::atomic got_mutable{false}; std::thread writer([&batch, &got_mutable]() { - auto rw = batch->to_mutable(); + auto rw = batch->get_mutable(); got_mutable.store(true); }); @@ -242,23 +245,23 @@ TEST_CASE("data_batch mutable blocks until readonly released", "[data_batch]") TEST_CASE("data_batch mutable to readonly through idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto rw = batch->to_mutable(); - auto idle = data_batch::to_idle(std::move(rw)); - auto ro = idle->to_read_only(); - REQUIRE(ro.get_batch_id() == 1); + auto rw = batch->get_mutable(); + auto idle = mutable_data_batch::to_idle(std::move(rw)); + auto ro = idle->get_read_only(); + REQUIRE(ro->get_batch_id() == 1); } TEST_CASE("data_batch readonly to mutable through idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro = batch->to_read_only(); - auto idle = data_batch::to_idle(std::move(ro)); - auto rw = idle->to_mutable(); - REQUIRE(rw.get_batch_id() == 1); + auto ro = batch->get_read_only(); + auto idle = read_only_data_batch::to_idle(std::move(ro)); + auto rw = idle->get_mutable(); + REQUIRE(rw->get_batch_id() == 1); } // ============================================================================= @@ -275,10 +278,10 @@ TEST_CASE("data_batch destruction order safety", "[data_batch]") // If the order were reversed, the mutex would be destroyed before the lock // releases, causing undefined behavior detectable by TSan/ASan. auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); // Create accessor -- this is now the ONLY shared_ptr holding the batch alive. - auto ro = batch->to_read_only(); + auto ro = batch->get_read_only(); batch.reset(); // batch is null now. The only reference to the data_batch is inside ro._batch. @@ -292,7 +295,7 @@ TEST_CASE("data_batch destruction order safety", "[data_batch]") TEST_CASE("data_batch subscribe always succeeds", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); REQUIRE(batch->get_subscriber_count() == 0); batch->subscribe(); @@ -304,7 +307,7 @@ TEST_CASE("data_batch subscribe always succeeds", "[data_batch]") TEST_CASE("data_batch unsubscribe decrements count", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); batch->subscribe(); batch->subscribe(); @@ -319,7 +322,7 @@ TEST_CASE("data_batch unsubscribe decrements count", "[data_batch]") TEST_CASE("data_batch unsubscribe throws at zero", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); REQUIRE_THROWS_AS(batch->unsubscribe(), std::runtime_error); REQUIRE(batch->get_subscriber_count() == 0); @@ -328,7 +331,7 @@ TEST_CASE("data_batch unsubscribe throws at zero", "[data_batch]") TEST_CASE("data_batch subscriber count thread safety", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); constexpr int num_threads = 10; constexpr int subs_per_thread = 100; @@ -370,15 +373,15 @@ TEST_CASE("data_batch subscriber count thread safety", "[data_batch]") TEST_CASE("data_batch set_data via mutable accessor", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto rw = batch->to_mutable(); - REQUIRE(rw.get_current_tier() == memory::Tier::GPU); - rw.set_data(std::make_unique(memory::Tier::HOST, 2048)); - batch = data_batch::to_idle(std::move(rw)); + auto rw = batch->get_mutable(); + REQUIRE(rw->get_current_tier() == memory::Tier::GPU); + rw->set_data(std::make_unique(memory::Tier::HOST, 2048)); + batch = mutable_data_batch::to_idle(std::move(rw)); - auto ro = batch->to_read_only(); - REQUIRE(ro.get_current_tier() == memory::Tier::HOST); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_current_tier() == memory::Tier::HOST); } // ============================================================================= @@ -389,21 +392,21 @@ TEST_CASE("data_batch accessor get_current_tier", "[data_batch]") { { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - auto ro = batch->to_read_only(); - REQUIRE(ro.get_current_tier() == memory::Tier::GPU); + auto batch = data_batch::make(1, std::move(data)); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_current_tier() == memory::Tier::GPU); } { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(2, std::move(data)); - auto ro = batch->to_read_only(); - REQUIRE(ro.get_current_tier() == memory::Tier::HOST); + auto batch = data_batch::make(2, std::move(data)); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_current_tier() == memory::Tier::HOST); } { auto data = std::make_unique(memory::Tier::DISK, 1024); - auto batch = std::make_shared(3, std::move(data)); - auto ro = batch->to_read_only(); - REQUIRE(ro.get_current_tier() == memory::Tier::DISK); + auto batch = data_batch::make(3, std::move(data)); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_current_tier() == memory::Tier::DISK); } } @@ -417,7 +420,7 @@ TEST_CASE("data_batch unique IDs", "[data_batch]") for (auto id : batch_ids) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(id, std::move(data)); + auto batch = data_batch::make(id, std::move(data)); REQUIRE(batch->get_batch_id() == id); } } @@ -429,7 +432,7 @@ TEST_CASE("data_batch unique IDs", "[data_batch]") TEST_CASE("data_batch thread-safe concurrent readonly", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); constexpr int num_threads = 10; constexpr int reads_per_thread = 100; @@ -438,8 +441,8 @@ TEST_CASE("data_batch thread-safe concurrent readonly", "[data_batch]") for (int i = 0; i < num_threads; ++i) { threads.emplace_back([&batch]() { for (int j = 0; j < reads_per_thread; ++j) { - auto ro = batch->to_read_only(); - REQUIRE(ro.get_batch_id() == 1); + auto ro = batch->get_read_only(); + REQUIRE(ro->get_batch_id() == 1); } }); } @@ -452,7 +455,7 @@ TEST_CASE("data_batch thread-safe concurrent readonly", "[data_batch]") TEST_CASE("data_batch thread-safe mutable access serialized", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); constexpr int num_threads = 10; std::atomic concurrent_writers{0}; @@ -462,7 +465,7 @@ TEST_CASE("data_batch thread-safe mutable access serialized", "[data_batch]") for (int i = 0; i < num_threads; ++i) { threads.emplace_back([&batch, &concurrent_writers, &saw_concurrent]() { for (int j = 0; j < 10; ++j) { - auto rw = batch->to_mutable(); + auto rw = batch->get_mutable(); int count = concurrent_writers.fetch_add(1); if (count > 0) { saw_concurrent.store(true); } std::this_thread::sleep_for(std::chrono::microseconds(1)); @@ -485,35 +488,35 @@ TEST_CASE("data_batch thread-safe mutable access serialized", "[data_batch]") TEST_CASE("data_batch clone creates independent copy", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 2048); - auto batch = std::make_shared(42, std::move(data)); + auto batch = data_batch::make(42, std::move(data)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(100, rmm::cuda_stream_view{}); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(100, rmm::cuda_stream_view{}); REQUIRE(cloned != nullptr); REQUIRE(cloned->get_batch_id() == 100); REQUIRE(cloned->get_subscriber_count() == 0); - REQUIRE(ro.get_batch_id() == 42); + REQUIRE(ro->get_batch_id() == 42); - auto ro_clone = cloned->to_read_only(); - REQUIRE(ro_clone.get_data()->get_size_in_bytes() == ro.get_data()->get_size_in_bytes()); - REQUIRE(ro_clone.get_data() != ro.get_data()); + auto ro_clone = cloned->get_read_only(); + REQUIRE(ro_clone->get_data()->get_size_in_bytes() == ro->get_data()->get_size_in_bytes()); + REQUIRE(ro_clone->get_data() != ro->get_data()); } TEST_CASE("data_batch clone with different batch IDs", "[data_batch]") { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro = batch->to_read_only(); + auto ro = batch->get_read_only(); - auto clone1 = ro.clone(1, rmm::cuda_stream_view{}); + auto clone1 = batch->clone(1, rmm::cuda_stream_view{}); REQUIRE(clone1->get_batch_id() == 1); - auto clone2 = ro.clone(0, rmm::cuda_stream_view{}); + auto clone2 = batch->clone(0, rmm::cuda_stream_view{}); REQUIRE(clone2->get_batch_id() == 0); - auto clone3 = ro.clone(UINT64_MAX, rmm::cuda_stream_view{}); + auto clone3 = batch->clone(UINT64_MAX, rmm::cuda_stream_view{}); REQUIRE(clone3->get_batch_id() == UINT64_MAX); } @@ -522,29 +525,29 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") SECTION("GPU tier") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, rmm::cuda_stream_view{}); - auto ro_cl = cloned->to_read_only(); - REQUIRE(ro_cl.get_current_tier() == memory::Tier::GPU); + auto batch = data_batch::make(1, std::move(data)); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto ro_cl = cloned->get_read_only(); + REQUIRE(ro_cl->get_current_tier() == memory::Tier::GPU); } SECTION("HOST tier") { auto data = std::make_unique(memory::Tier::HOST, 1024); - auto batch = std::make_shared(1, std::move(data)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, rmm::cuda_stream_view{}); - auto ro_cl = cloned->to_read_only(); - REQUIRE(ro_cl.get_current_tier() == memory::Tier::HOST); + auto batch = data_batch::make(1, std::move(data)); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto ro_cl = cloned->get_read_only(); + REQUIRE(ro_cl->get_current_tier() == memory::Tier::HOST); } SECTION("DISK tier") { auto data = std::make_unique(memory::Tier::DISK, 1024); - auto batch = std::make_shared(1, std::move(data)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, rmm::cuda_stream_view{}); - auto ro_cl = cloned->to_read_only(); - REQUIRE(ro_cl.get_current_tier() == memory::Tier::DISK); + auto batch = data_batch::make(1, std::move(data)); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto ro_cl = cloned->get_read_only(); + REQUIRE(ro_cl->get_current_tier() == memory::Tier::DISK); } } @@ -563,17 +566,17 @@ TEST_CASE("data_batch clone with real GPU data verifies data integrity", "[data_ auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = data_batch::make(1, std::move(gpu_repr)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, stream.view()); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, stream.view()); REQUIRE(cloned != nullptr); REQUIRE(cloned->get_batch_id() == 2); - auto ro_clone = cloned->to_read_only(); + auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro.get_data()); - auto* cloned_repr = dynamic_cast(ro_clone.get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(original_repr != nullptr); REQUIRE(cloned_repr != nullptr); @@ -594,15 +597,15 @@ TEST_CASE("data_batch clone creates independent memory copies", "[data_batch][gp auto table = create_simple_cudf_table(50, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = data_batch::make(1, std::move(gpu_repr)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, stream.view()); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, stream.view()); - auto ro_clone = cloned->to_read_only(); + auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro.get_data()); - auto* cloned_repr = dynamic_cast(ro_clone.get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify each column points to different memory for (cudf::size_type i = 0; i < original_repr->get_table_view().num_columns(); ++i) { @@ -619,26 +622,26 @@ TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") auto table = create_simple_cudf_table(30, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = data_batch::make(1, std::move(gpu_repr)); // Clone 3 times from the same read_only accessor (clone does not consume the accessor) - auto ro = batch->to_read_only(); - auto clone1 = ro.clone(10, stream.view()); - auto clone2 = ro.clone(20, stream.view()); - auto clone3 = ro.clone(30, stream.view()); + auto ro = batch->get_read_only(); + auto clone1 = batch->clone(10, stream.view()); + auto clone2 = batch->clone(20, stream.view()); + auto clone3 = batch->clone(30, stream.view()); REQUIRE(clone1->get_batch_id() == 10); REQUIRE(clone2->get_batch_id() == 20); REQUIRE(clone3->get_batch_id() == 30); - auto ro_c1 = clone1->to_read_only(); - auto ro_c2 = clone2->to_read_only(); - auto ro_c3 = clone3->to_read_only(); + auto ro_c1 = clone1->get_read_only(); + auto ro_c2 = clone2->get_read_only(); + auto ro_c3 = clone3->get_read_only(); - auto* original_repr = dynamic_cast(ro.get_data()); - auto* clone1_repr = dynamic_cast(ro_c1.get_data()); - auto* clone2_repr = dynamic_cast(ro_c2.get_data()); - auto* clone3_repr = dynamic_cast(ro_c3.get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* clone1_repr = dynamic_cast(ro_c1->get_data()); + auto* clone2_repr = dynamic_cast(ro_c2->get_data()); + auto* clone3_repr = dynamic_cast(ro_c3->get_data()); stream.synchronize(); expect_cudf_tables_equal_on_stream( @@ -657,14 +660,14 @@ TEST_CASE("data_batch clone with empty table", "[data_batch][gpu]") auto table = create_simple_cudf_table(0, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = data_batch::make(1, std::move(gpu_repr)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, stream.view()); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, stream.view()); REQUIRE(cloned != nullptr); - auto ro_clone = cloned->to_read_only(); - auto* cloned_repr = dynamic_cast(ro_clone.get_data()); + auto ro_clone = cloned->get_read_only(); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(cloned_repr != nullptr); REQUIRE(cloned_repr->get_table_view().num_rows() == 0); REQUIRE(cloned_repr->get_table_view().num_columns() == 2); @@ -679,16 +682,16 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") create_simple_cudf_table(10000, 2, gpu_space->get_default_allocator(), stream.view()); auto gpu_repr = std::make_unique( std::make_unique(std::move(table)), *gpu_space); - auto batch = std::make_shared(1, std::move(gpu_repr)); + auto batch = data_batch::make(1, std::move(gpu_repr)); - auto ro = batch->to_read_only(); - auto cloned = ro.clone(2, stream.view()); + auto ro = batch->get_read_only(); + auto cloned = batch->clone(2, stream.view()); REQUIRE(cloned != nullptr); - auto ro_clone = cloned->to_read_only(); + auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro.get_data()); - auto* cloned_repr = dynamic_cast(ro_clone.get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify structure REQUIRE(cloned_repr->get_table_view().num_rows() == 10000); @@ -711,44 +714,44 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") TEST_CASE("data_batch initial state is idle", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); REQUIRE(batch->get_state() == batch_state::idle); } TEST_CASE("data_batch state transitions", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); SECTION("idle -> read_only -> idle") { - auto ro = batch->to_read_only(); - auto idle = data_batch::to_idle(std::move(ro)); + auto ro = batch->get_read_only(); + auto idle = read_only_data_batch::to_idle(std::move(ro)); REQUIRE(idle->get_state() == batch_state::idle); } SECTION("idle -> mutable_locked -> idle") { - auto mut = batch->to_mutable(); - auto idle = data_batch::to_idle(std::move(mut)); + auto mut = batch->get_mutable(); + auto idle = mutable_data_batch::to_idle(std::move(mut)); REQUIRE(idle->get_state() == batch_state::idle); } - SECTION("try_to_read_only updates state on success") + SECTION("try_get_read_only updates state on success") { - auto result = batch->try_to_read_only(); + auto result = batch->try_get_read_only(); REQUIRE(result.has_value()); - auto idle = data_batch::to_idle(std::move(*result)); + auto idle = read_only_data_batch::to_idle(std::move(*result)); REQUIRE(idle->get_state() == batch_state::idle); } - SECTION("try_to_mutable updates state on success") + SECTION("try_get_mutable updates state on success") { - auto result = batch->try_to_mutable(); + auto result = batch->try_get_mutable(); REQUIRE(result.has_value()); - auto idle = data_batch::to_idle(std::move(*result)); + auto idle = mutable_data_batch::to_idle(std::move(*result)); REQUIRE(idle->get_state() == batch_state::idle); } } @@ -757,59 +760,59 @@ TEST_CASE("data_batch state transitions", "[data_batch]") // Non-static transition tests (shared_from_this) // ============================================================================= -TEST_CASE("data_batch non-static to_read_only does not consume caller pointer", "[data_batch]") +TEST_CASE("data_batch get_read_only does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto accessor = batch->to_read_only(); + auto accessor = batch->get_read_only(); REQUIRE(batch != nullptr); REQUIRE(batch->get_batch_id() == 1); REQUIRE(batch->get_state() == batch_state::read_only); - REQUIRE(accessor.get_batch_id() == 1); + REQUIRE(accessor->get_batch_id() == 1); } -TEST_CASE("data_batch non-static to_mutable does not consume caller pointer", "[data_batch]") +TEST_CASE("data_batch get_mutable does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto accessor = batch->to_mutable(); + auto accessor = batch->get_mutable(); REQUIRE(batch != nullptr); REQUIRE(batch->get_batch_id() == 1); REQUIRE(batch->get_state() == batch_state::mutable_locked); - REQUIRE(accessor.get_batch_id() == 1); + REQUIRE(accessor->get_batch_id() == 1); } -TEST_CASE("data_batch non-static try_to_read_only", "[data_batch]") +TEST_CASE("data_batch try_get_read_only does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto result = batch->try_to_read_only(); + auto result = batch->try_get_read_only(); REQUIRE(result.has_value()); REQUIRE(batch != nullptr); REQUIRE(batch->get_state() == batch_state::read_only); } -TEST_CASE("data_batch non-static try_to_mutable", "[data_batch]") +TEST_CASE("data_batch try_get_mutable does not consume caller pointer", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto result = batch->try_to_mutable(); + auto result = batch->try_get_mutable(); REQUIRE(result.has_value()); REQUIRE(batch != nullptr); REQUIRE(batch->get_state() == batch_state::mutable_locked); } -TEST_CASE("data_batch non-static try_to_mutable fails when read-locked", "[data_batch]") +TEST_CASE("data_batch try_get_mutable fails when read-locked", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro = batch->to_read_only(); - auto result = batch->try_to_mutable(); + auto ro = batch->get_read_only(); + auto result = batch->try_get_mutable(); REQUIRE_FALSE(result.has_value()); } @@ -824,7 +827,9 @@ struct conversion_sync_observer { bool synced_before_destroy = false; conversion_sync_observer() - { CUCASCADE_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); } + { + CUCASCADE_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + } ~conversion_sync_observer() { cudaEventDestroy(event); } conversion_sync_observer(const conversion_sync_observer&) = delete; @@ -846,14 +851,18 @@ class observed_gpu_representation : private cucascade::test::mock_memory_space_h } ~observed_gpu_representation() override - { _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); } + { + _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); + } void const* data() const { return _buf.data(); } std::size_t get_size_in_bytes() const override { return _buf.size(); } std::size_t get_uncompressed_data_size_in_bytes() const override { return _buf.size(); } std::unique_ptr clone( [[maybe_unused]] rmm::cuda_stream_view stream) override - { return nullptr; } + { + return nullptr; + } private: rmm::device_buffer _buf; @@ -896,10 +905,10 @@ TEST_CASE("convert_to synchronizes stream before destroying GPU source", "[data_ }); auto gpu_data = std::make_unique(std::move(gpu_buf), observer); - auto batch = std::make_shared(1, std::move(gpu_data)); + auto batch = data_batch::make(1, std::move(gpu_data)); { - auto mut = batch->to_mutable(); - mut.convert_to(registry, host_space.get(), stream.view()); + auto mut = batch->get_mutable(); + mut->convert_to(registry, host_space.get(), stream.view()); } // With the fix: convert_to synchronizes the stream before the old GPU @@ -940,14 +949,18 @@ class observed_host_representation : private cucascade::test::mock_memory_space_ } ~observed_host_representation() override - { _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); } + { + _observer.synced_before_destroy = (cudaEventQuery(_observer.event) == cudaSuccess); + } void const* data() const { return _pinned_ptr; } std::size_t get_size_in_bytes() const override { return _size; } std::size_t get_uncompressed_data_size_in_bytes() const override { return _size; } std::unique_ptr clone( [[maybe_unused]] rmm::cuda_stream_view stream) override - { return nullptr; } + { + return nullptr; + } private: void* _pinned_ptr; @@ -989,10 +1002,10 @@ TEST_CASE("convert_to synchronizes stream before destroying HOST source when tar auto host_data = std::make_unique(pinned_host, buf_size, observer); auto gpu_space = make_mock_memory_space(memory::Tier::GPU, 0); - auto batch = std::make_shared(1, std::move(host_data)); + auto batch = data_batch::make(1, std::move(host_data)); { - auto mut = batch->to_mutable(); - mut.convert_to(registry, gpu_space.get(), stream.view()); + auto mut = batch->get_mutable(); + mut->convert_to(registry, gpu_space.get(), stream.view()); } // With the fix: convert_to synchronizes the stream before the old HOST @@ -1006,7 +1019,9 @@ TEST_CASE("convert_to synchronizes stream before destroying HOST source when tar // Host callback that blocks the CUDA stream for a fixed duration, used to // create a deterministic window during which stream.synchronize() is blocked. static void CUDART_CB stream_delay_callback(void* /*userData*/) -{ std::this_thread::sleep_for(std::chrono::milliseconds(50)); } +{ + std::this_thread::sleep_for(std::chrono::milliseconds(50)); +} TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync", "[data_batch][convert_to]") @@ -1047,11 +1062,11 @@ TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync }); auto gpu_data = std::make_unique(std::move(gpu_buf), observer); - auto batch = std::make_shared(1, std::move(gpu_data)); + auto batch = data_batch::make(1, std::move(gpu_data)); std::thread convert_thread([&]() { - auto mut = batch->to_mutable(); - mut.convert_to(registry, host_space.get(), stream.view()); + auto mut = batch->get_mutable(); + mut->convert_to(registry, host_space.get(), stream.view()); }); // Spin until the converter function has returned — convert_to is now blocked @@ -1064,18 +1079,18 @@ TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync std::this_thread::sleep_for(std::chrono::microseconds(500)); // mutable_data_batch holds _rw_mutex exclusively for its entire lifetime, - // including during stream.synchronize(). try_to_mutable() must fail. - auto try_result = batch->try_to_mutable(); + // including during stream.synchronize(). try_get_mutable() must fail. + auto try_result = batch->try_get_mutable(); auto state_during_sync = batch->get_state(); - // Confirm the stream work was still in progress when we called try_to_mutable. + // Confirm the stream work was still in progress when we called try_get_mutable. // cudaErrorNotReady means the event (recorded after the 50 ms callback) hasn't // completed yet, proving we polled DURING the sync window. bool accessed_during_sync = (cudaEventQuery(observer.event) == cudaErrorNotReady); convert_thread.join(); - // Exclusive lock must have been held — try_to_mutable returned nullopt. + // Exclusive lock must have been held -- try_get_mutable returned nullopt. REQUIRE(!try_result.has_value()); // State must have been mutable_locked while the exclusive lock was held. REQUIRE(state_during_sync == batch_state::mutable_locked); @@ -1087,50 +1102,6 @@ TEST_CASE("mutable_data_batch holds exclusive lock during convert_to stream sync CUCASCADE_CUDA_TRY(cudaFreeHost(pinned_host)); } -// ============================================================================= -// Locked-to-locked transition tests -// ============================================================================= - -TEST_CASE("data_batch readonly_to_mutable", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro = batch->to_read_only(); - auto mut = data_batch::readonly_to_mutable(std::move(ro)); - REQUIRE(mut.get_batch_id() == 1); - - auto idle = data_batch::to_idle(std::move(mut)); - REQUIRE(idle->get_state() == batch_state::idle); -} - -TEST_CASE("data_batch mutable_to_readonly", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto mut = batch->to_mutable(); - auto ro = data_batch::mutable_to_readonly(std::move(mut)); - REQUIRE(ro.get_batch_id() == 1); - - auto idle = data_batch::to_idle(std::move(ro)); - REQUIRE(idle->get_state() == batch_state::idle); -} - -TEST_CASE("data_batch full cycle: idle -> ro -> mutable -> ro -> idle", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro1 = batch->to_read_only(); - auto mut = data_batch::readonly_to_mutable(std::move(ro1)); - auto ro2 = data_batch::mutable_to_readonly(std::move(mut)); - auto idle = data_batch::to_idle(std::move(ro2)); - - REQUIRE(idle->get_state() == batch_state::idle); - REQUIRE(idle->get_batch_id() == 1); -} - // ============================================================================= // RAII lifecycle tests: _read_only_count tracking and destructor state transitions // ============================================================================= @@ -1138,24 +1109,24 @@ TEST_CASE("data_batch full cycle: idle -> ro -> mutable -> ro -> idle", "[data_b TEST_CASE("data_batch read_only_count tracks concurrent readers", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); REQUIRE(batch->get_read_only_count() == 0); // Create first reader - auto ro1 = batch->to_read_only(); + auto ro1 = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 1); // Create second reader - auto ro2 = batch->to_read_only(); + auto ro2 = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 2); // Create third reader - auto ro3 = batch->to_read_only(); + auto ro3 = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 3); // Drop one reader via to_idle - auto idle = data_batch::to_idle(std::move(ro1)); + auto idle = read_only_data_batch::to_idle(std::move(ro1)); REQUIRE(batch->get_read_only_count() == 2); // Drop remaining readers via destructor (scope exit) @@ -1176,10 +1147,10 @@ TEST_CASE("data_batch read_only_count tracks concurrent readers", "[data_batch]" TEST_CASE("data_batch destructor transitions state to idle for read_only", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); { - auto ro = batch->to_read_only(); + auto ro = batch->get_read_only(); REQUIRE(batch->get_state() == batch_state::read_only); // ro destructor fires here } @@ -1189,10 +1160,10 @@ TEST_CASE("data_batch destructor transitions state to idle for read_only", "[dat TEST_CASE("data_batch destructor transitions state to idle for mutable", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); { - auto mut = batch->to_mutable(); + auto mut = batch->get_mutable(); REQUIRE(batch->get_state() == batch_state::mutable_locked); // mut destructor fires here } @@ -1202,7 +1173,7 @@ TEST_CASE("data_batch destructor transitions state to idle for mutable", "[data_ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); // Track event ordering std::vector events; @@ -1213,7 +1184,7 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", }; // Phase 1: Create initial read_only on main thread - auto ro_initial = batch->to_read_only(); + auto ro_initial = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 1); std::atomic thread1_readers_created{false}; @@ -1224,8 +1195,8 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", // Thread 1: create 2 more read_only, then release all 3, then create 2 more after mutable done std::thread t1([&]() { // Create 2 more readers - auto ro_t1_a = batch->to_read_only(); - auto ro_t1_b = batch->to_read_only(); + auto ro_t1_a = batch->get_read_only(); + auto ro_t1_b = batch->get_read_only(); log_event("t1: 3 readers active"); REQUIRE(batch->get_read_only_count() == 3); thread1_readers_created.store(true); @@ -1251,12 +1222,12 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", } // Create 2 new readers after mutable is done - auto ro_new_a = batch->to_read_only(); - auto ro_new_b = batch->to_read_only(); + auto ro_new_a = batch->get_read_only(); + auto ro_new_b = batch->get_read_only(); log_event("t1: 2 new readers after mutable"); REQUIRE(batch->get_read_only_count() == 2); - REQUIRE(ro_new_a.get_batch_id() == 1); - REQUIRE(ro_new_b.get_batch_id() == 1); + REQUIRE(ro_new_a->get_batch_id() == 1); + REQUIRE(ro_new_b->get_batch_id() == 1); // Let them go out of scope — destructors clean up }); @@ -1269,13 +1240,13 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", // This will block until all read_only locks are released log_event("t2: requesting mutable"); - auto mut = batch->to_mutable(); + auto mut = batch->get_mutable(); log_event("t2: mutable acquired"); thread2_mutable_acquired.store(true); REQUIRE(batch->get_state() == batch_state::mutable_locked); REQUIRE(batch->get_read_only_count() == 0); - REQUIRE(mut.get_batch_id() == 1); + REQUIRE(mut->get_batch_id() == 1); // Hold mutable briefly std::this_thread::sleep_for(std::chrono::milliseconds(20)); @@ -1321,9 +1292,9 @@ TEST_CASE("data_batch concurrent lifecycle: readers then mutable then readers", TEST_CASE("data_batch move does not change read_only_count", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro1 = batch->to_read_only(); + auto ro1 = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 1); // Move should not change count — ownership transferred, not a new reader @@ -1335,211 +1306,56 @@ TEST_CASE("data_batch move does not change read_only_count", "[data_batch]") } // ============================================================================= -// read_only_data_batch copy semantics tests +// read_only_data_batch clone access tests // ============================================================================= -TEST_CASE("read_only_data_batch is copyable", "[data_batch]") +TEST_CASE("read_only_data_batch is move-only", "[data_batch]") { - static_assert(std::is_copy_constructible_v); - static_assert(std::is_copy_assignable_v); + static_assert(!std::is_copy_constructible_v); + static_assert(!std::is_copy_assignable_v); + static_assert(std::is_move_constructible_v); + static_assert(std::is_move_assignable_v); } -TEST_CASE("read_only_data_batch copy constructor acquires new shared lock", "[data_batch]") +TEST_CASE("read_only_data_batch clone_read_only_access acquires new shared lock", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro1 = batch->to_read_only(); + auto ro1 = batch->get_read_only(); REQUIRE(batch->get_read_only_count() == 1); - auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) + auto ro2 = ro1.clone_read_only_access(); REQUIRE(batch->get_read_only_count() == 2); - REQUIRE(ro1.get_batch_id() == 1); - REQUIRE(ro2.get_batch_id() == 1); - REQUIRE(ro1.get_current_tier() == memory::Tier::GPU); - REQUIRE(ro2.get_current_tier() == memory::Tier::GPU); - REQUIRE(ro1.get_data() == ro2.get_data()); + REQUIRE(ro1->get_batch_id() == 1); + REQUIRE(ro2->get_batch_id() == 1); + REQUIRE(ro1->get_current_tier() == memory::Tier::GPU); + REQUIRE(ro2->get_current_tier() == memory::Tier::GPU); + REQUIRE(ro1->get_data() == ro2->get_data()); } -TEST_CASE("read_only_data_batch copy destructor decrements count", "[data_batch]") +TEST_CASE("read_only_data_batch cloned access blocks mutable access", "[data_batch]") { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); - auto ro1 = batch->to_read_only(); - REQUIRE(batch->get_read_only_count() == 1); + auto ro = batch->get_read_only(); + auto clone = ro.clone_read_only_access(); - { - auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - REQUIRE(batch->get_read_only_count() == 2); - } - REQUIRE(batch->get_read_only_count() == 1); - REQUIRE(batch->get_state() == batch_state::read_only); -} - -TEST_CASE("read_only_data_batch copy outlives original", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - std::optional copy; - { - auto ro = batch->to_read_only(); - copy.emplace(ro); - REQUIRE(batch->get_read_only_count() == 2); - } - REQUIRE(batch->get_read_only_count() == 1); - REQUIRE(batch->get_state() == batch_state::read_only); - REQUIRE(copy->get_batch_id() == 1); - REQUIRE(copy->get_current_tier() == memory::Tier::GPU); - - copy.reset(); - REQUIRE(batch->get_read_only_count() == 0); - REQUIRE(batch->get_state() == batch_state::idle); -} - -TEST_CASE("read_only_data_batch multiple copies all independent", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro1 = batch->to_read_only(); - auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - auto ro3 = ro2; // NOLINT(performance-unnecessary-copy-initialization) - auto ro4 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - REQUIRE(batch->get_read_only_count() == 4); - - { - auto temp = std::move(ro2); - } - REQUIRE(batch->get_read_only_count() == 3); - - { - auto temp = std::move(ro3); - } - REQUIRE(batch->get_read_only_count() == 2); - - REQUIRE(ro1.get_batch_id() == 1); - REQUIRE(ro4.get_batch_id() == 1); -} - -TEST_CASE("read_only_data_batch copy assignment replaces existing lock", "[data_batch]") -{ - auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); - - auto data2 = std::make_unique(memory::Tier::HOST, 2048); - auto batch2 = std::make_shared(2, std::move(data2)); - - auto ro1 = batch1->to_read_only(); - auto ro2 = batch2->to_read_only(); - REQUIRE(batch1->get_read_only_count() == 1); - REQUIRE(batch2->get_read_only_count() == 1); - - ro1 = ro2; - REQUIRE(batch1->get_read_only_count() == 0); - REQUIRE(batch1->get_state() == batch_state::idle); - REQUIRE(batch2->get_read_only_count() == 2); - REQUIRE(ro1.get_batch_id() == 2); - REQUIRE(ro1.get_current_tier() == memory::Tier::HOST); -} - -TEST_CASE("read_only_data_batch copy self-assignment is safe", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro = batch->to_read_only(); - REQUIRE(batch->get_read_only_count() == 1); - - ro = ro; - REQUIRE(batch->get_read_only_count() == 1); - REQUIRE(batch->get_state() == batch_state::read_only); - REQUIRE(ro.get_batch_id() == 1); -} - -TEST_CASE("read_only_data_batch copy blocks mutable access", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro = batch->to_read_only(); - auto copy = ro; // NOLINT(performance-unnecessary-copy-initialization) - - // Destroy original — copy still holds shared lock { auto temp = std::move(ro); } REQUIRE(batch->get_read_only_count() == 1); - // Mutable should still be blocked by the copy's shared lock - std::atomic got_lock{false}; - std::thread t([&batch, &got_lock]() { - auto result = batch->try_to_mutable(); - got_lock.store(result.has_value()); - }); - t.join(); - REQUIRE(got_lock.load() == false); -} - -TEST_CASE("read_only_data_batch last copy destruction transitions to idle", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto result = batch->try_get_mutable(); + REQUIRE_FALSE(result.has_value()); { - auto ro1 = batch->to_read_only(); - auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - auto ro3 = ro2; // NOLINT(performance-unnecessary-copy-initialization) - REQUIRE(batch->get_read_only_count() == 3); - REQUIRE(batch->get_state() == batch_state::read_only); + auto temp = std::move(clone); } REQUIRE(batch->get_read_only_count() == 0); - REQUIRE(batch->get_state() == batch_state::idle); -} - -TEST_CASE("read_only_data_batch concurrent copies thread safety", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - auto ro = batch->to_read_only(); - - constexpr int num_threads = 10; - constexpr int copies_per_thread = 50; - - std::vector threads; - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&ro]() { - for (int j = 0; j < copies_per_thread; ++j) { - auto copy = ro; // NOLINT(performance-unnecessary-copy-initialization) - REQUIRE(copy.get_batch_id() == 1); - } - }); - } - - for (auto& t : threads) { - t.join(); - } - - // Only the original should remain - REQUIRE(batch->get_read_only_count() == 1); -} - -TEST_CASE("read_only_data_batch copy then mutable after all copies released", "[data_batch]") -{ - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); - - { - auto ro1 = batch->to_read_only(); - auto ro2 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - auto ro3 = ro1; // NOLINT(performance-unnecessary-copy-initialization) - REQUIRE(batch->get_read_only_count() == 3); - } - // All copies destroyed — mutable should succeed - auto mut = batch->to_mutable(); + auto mut = batch->get_mutable(); REQUIRE(batch->get_state() == batch_state::mutable_locked); - REQUIRE(mut.get_batch_id() == 1); + REQUIRE(mut->get_batch_id() == 1); } diff --git a/test/data/test_data_repository.cpp b/test/data/test_data_repository.cpp index 165d75e..8bffef5 100644 --- a/test/data/test_data_repository.cpp +++ b/test/data/test_data_repository.cpp @@ -66,7 +66,7 @@ TEST_CASE("shared_data_repository FIFO Order", "[data_repository]") for (uint64_t i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = data_batch::make(i, std::move(data)); repository.add_data_batch(batch); } @@ -87,7 +87,7 @@ TEST_CASE("shared_data_repository Same Batch Multiple Repositories", "[data_repo shared_data_repository repo3; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(42, std::move(data)); + auto batch = data_batch::make(42, std::move(data)); repo1.add_data_batch(batch); repo2.add_data_batch(batch); @@ -133,7 +133,7 @@ TEST_CASE("shared_data_repository Thread-Safe Adding", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); repository.add_data_batch(batch); } }); @@ -161,7 +161,7 @@ TEST_CASE("shared_data_repository Thread-Safe Pulling", "[data_repository]") for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = data_batch::make(i, std::move(data)); repository.add_data_batch(batch); } @@ -204,7 +204,7 @@ TEST_CASE("shared_data_repository Thread-Safe Pulling with Multiple Partitions", for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = data_batch::make(i, std::move(data)); repository.add_data_batch(batch, i % num_partitions); } @@ -243,265 +243,6 @@ TEST_CASE("shared_data_repository Thread-Safe Pulling with Multiple Partitions", REQUIRE(empty == nullptr); } -// ============================================================================= -// Tests for unique_ptr based repository -// ============================================================================= - -TEST_CASE("unique_data_repository Construction", "[data_repository]") -{ - unique_data_repository repository; - - auto batch = repository.pop_next_data_batch(); - REQUIRE(batch == nullptr); -} - -TEST_CASE("unique_data_repository Add and Pull Single Batch", "[data_repository]") -{ - unique_data_repository repository; - - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(1, std::move(data)); - - repository.add_data_batch(std::move(batch)); - - auto pulled_batch = repository.pop_next_data_batch(); - REQUIRE(pulled_batch != nullptr); - REQUIRE(pulled_batch->get_batch_id() == 1); - - auto empty = repository.pop_next_data_batch(); - REQUIRE(empty == nullptr); -} - -TEST_CASE("unique_data_repository FIFO Order", "[data_repository]") -{ - unique_data_repository repository; - - for (uint64_t i = 1; i <= 5; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - for (uint64_t i = 1; i <= 5; ++i) { - auto pulled_batch = repository.pop_next_data_batch(); - REQUIRE(pulled_batch != nullptr); - REQUIRE(pulled_batch->get_batch_id() == i); - } - - auto empty = repository.pop_next_data_batch(); - REQUIRE(empty == nullptr); -} - -TEST_CASE("unique_data_repository Large Number of Batches", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_batches = 1000; - - for (int i = 0; i < num_batches; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - int count = 0; - while (true) { - auto batch = repository.pop_next_data_batch(); - if (!batch) break; - ++count; - } - - REQUIRE(count == num_batches); -} - -TEST_CASE("unique_data_repository Interleaved Add and Pull", "[data_repository]") -{ - unique_data_repository repository; - - for (int cycle = 0; cycle < 50; ++cycle) { - for (int i = 0; i < 3; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(cycle * 3 + i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - auto batch = repository.pop_next_data_batch(); - REQUIRE(batch != nullptr); - } - - int remaining = 0; - while (true) { - auto batch = repository.pop_next_data_batch(); - if (!batch) break; - ++remaining; - } - - // Should have 50 cycles * 3 adds - 50 pulls = 100 remaining - REQUIRE(remaining == 100); -} - -TEST_CASE("unique_data_repository Thread-Safe Adding", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_threads = 10; - constexpr int batches_per_thread = 50; - - std::vector threads; - - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&, i]() { - for (int j = 0; j < batches_per_thread; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - }); - } - - for (auto& thread : threads) { - thread.join(); - } - - int count = 0; - while (true) { - auto batch = repository.pop_next_data_batch(); - if (!batch) break; - ++count; - } - - REQUIRE(count == num_threads * batches_per_thread); -} - -TEST_CASE("unique_data_repository Thread-Safe Pulling", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_batches = 500; - - for (int i = 0; i < num_batches; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - constexpr int num_threads = 10; - std::vector threads; - std::vector thread_counts(num_threads, 0); - - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&, i]() { - while (true) { - auto batch = repository.pop_next_data_batch(); - if (!batch) break; - ++thread_counts[i]; - } - }); - } - - for (auto& thread : threads) { - thread.join(); - } - - int total_count = 0; - for (int count : thread_counts) { - total_count += count; - } - - REQUIRE(total_count == num_batches); - - auto empty = repository.pop_next_data_batch(); - REQUIRE(empty == nullptr); -} - -TEST_CASE("unique_data_repository Concurrent Add and Pull", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_add_threads = 5; - constexpr int num_pull_threads = 5; - constexpr int batches_per_thread = 100; - - std::vector threads; - std::atomic pulled_count{0}; - - for (int i = 0; i < num_add_threads; ++i) { - threads.emplace_back([&, i]() { - for (int j = 0; j < batches_per_thread; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); - repository.add_data_batch(std::move(batch)); - - std::this_thread::sleep_for(std::chrono::microseconds(10)); - } - }); - } - - for (int i = 0; i < num_pull_threads; ++i) { - threads.emplace_back([&]() { - int local_count = 0; - while (local_count < batches_per_thread) { - auto batch = repository.pop_next_data_batch(); - if (batch) { - ++local_count; - ++pulled_count; - } else { - std::this_thread::yield(); - } - } - }); - } - - for (auto& thread : threads) { - thread.join(); - } - - REQUIRE(pulled_count == num_add_threads * batches_per_thread); -} - -TEST_CASE("unique_data_repository High Contention", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_threads = 20; - constexpr int operations_per_thread = 50; - - std::vector threads; - std::atomic total_added{0}; - std::atomic total_pulled{0}; - - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&, i]() { - for (int j = 0; j < operations_per_thread; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 512); - uint64_t batch_id = i * operations_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); - repository.add_data_batch(std::move(batch)); - ++total_added; - - auto pulled = repository.pop_next_data_batch(); - if (pulled) { ++total_pulled; } - } - }); - } - - for (auto& thread : threads) { - thread.join(); - } - - REQUIRE(total_added == num_threads * operations_per_thread); - - while (true) { - auto batch = repository.pop_next_data_batch(); - if (!batch) break; - ++total_pulled; - } - - REQUIRE(total_pulled == total_added); -} - // ============================================================================= // Tests for size() // ============================================================================= @@ -519,14 +260,14 @@ TEST_CASE("shared_data_repository size After Adding", "[data_repository]") REQUIRE(repository.size() == 0); auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); repository.add_data_batch(batch); REQUIRE(repository.size() == 1); for (int i = 2; i <= 5; ++i) { auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(i, std::move(data2)); + auto batch2 = data_batch::make(i, std::move(data2)); repository.add_data_batch(batch2); } @@ -539,7 +280,7 @@ TEST_CASE("shared_data_repository size After Pulling", "[data_repository]") for (int i = 1; i <= 5; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(i, std::move(data)); + auto batch = data_batch::make(i, std::move(data)); repository.add_data_batch(batch); } @@ -565,7 +306,7 @@ TEST_CASE("shared_data_repository size Interleaved Operations", "[data_repositor for (int cycle = 0; cycle < 10; ++cycle) { for (int i = 0; i < 3; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(cycle * 3 + i, std::move(data)); + auto batch = data_batch::make(cycle * 3 + i, std::move(data)); repository.add_data_batch(batch); } @@ -585,85 +326,6 @@ TEST_CASE("shared_data_repository size Interleaved Operations", "[data_repositor REQUIRE(repository.size() == 0); } -TEST_CASE("unique_data_repository size Empty", "[data_repository]") -{ - unique_data_repository repository; - REQUIRE(repository.size() == 0); -} - -TEST_CASE("unique_data_repository size After Adding", "[data_repository]") -{ - unique_data_repository repository; - - REQUIRE(repository.size() == 0); - - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(1, std::move(data)); - repository.add_data_batch(std::move(batch)); - - REQUIRE(repository.size() == 1); - - for (int i = 2; i <= 5; ++i) { - auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_unique(i, std::move(data2)); - repository.add_data_batch(std::move(batch2)); - } - - REQUIRE(repository.size() == 5); -} - -TEST_CASE("unique_data_repository size After Pulling", "[data_repository]") -{ - unique_data_repository repository; - - for (int i = 1; i <= 5; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - REQUIRE(repository.size() == 5); - - for (int i = 1; i <= 4; ++i) { - auto batch = repository.pop_next_data_batch(); - REQUIRE(batch != nullptr); - REQUIRE(repository.size() == (5 - i)); - } - - auto last_batch = repository.pop_next_data_batch(); - REQUIRE(last_batch != nullptr); - REQUIRE(repository.size() == 0); -} - -TEST_CASE("unique_data_repository size Interleaved Operations", "[data_repository]") -{ - unique_data_repository repository; - - REQUIRE(repository.size() == 0); - - for (int cycle = 0; cycle < 10; ++cycle) { - for (int i = 0; i < 3; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_unique(cycle * 3 + i, std::move(data)); - repository.add_data_batch(std::move(batch)); - } - - REQUIRE(repository.size() == (cycle * 2 + 3)); - - auto batch = repository.pop_next_data_batch(); - REQUIRE(batch != nullptr); - - REQUIRE(repository.size() == (cycle * 2 + 2)); - } - - while (repository.size() > 0) { - auto batch = repository.pop_next_data_batch(); - REQUIRE(batch != nullptr); - } - - REQUIRE(repository.size() == 0); -} - TEST_CASE("shared_data_repository size Thread-Safe", "[data_repository]") { shared_data_repository repository; @@ -679,7 +341,7 @@ TEST_CASE("shared_data_repository size Thread-Safe", "[data_repository]") for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); repository.add_data_batch(batch); if (repository.size() > 0) { ++availability_check_count; } @@ -695,37 +357,6 @@ TEST_CASE("shared_data_repository size Thread-Safe", "[data_repository]") REQUIRE(repository.size() > 0); } -TEST_CASE("unique_data_repository size Thread-Safe", "[data_repository]") -{ - unique_data_repository repository; - - constexpr int num_threads = 10; - constexpr int batches_per_thread = 100; - - std::vector threads; - std::atomic availability_check_count{0}; - - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&, i]() { - for (int j = 0; j < batches_per_thread; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = i * batches_per_thread + j; - auto batch = std::make_unique(batch_id, std::move(data)); - repository.add_data_batch(std::move(batch)); - - if (repository.size() > 0) { ++availability_check_count; } - } - }); - } - - for (auto& thread : threads) { - thread.join(); - } - - REQUIRE(availability_check_count == num_threads * batches_per_thread); - REQUIRE(repository.size() > 0); -} - TEST_CASE("shared_data_repository size Concurrent Operations", "[data_repository]") { shared_data_repository repository; @@ -741,7 +372,7 @@ TEST_CASE("shared_data_repository size Concurrent Operations", "[data_repository for (int j = 0; j < operations; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = i * operations + j; - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); repository.add_data_batch(batch); std::this_thread::sleep_for(std::chrono::microseconds(10)); } @@ -769,12 +400,12 @@ TEST_CASE("shared_data_repository size Concurrent Operations", "[data_repository REQUIRE(repository.size() == 0); } -std::vector> create_test_batches(std::vector batch_ids) +std::vector> create_test_batches(std::vector batch_ids) { - std::vector> batches; + std::vector> batches; for (auto batch_id : batch_ids) { auto data = std::make_unique(memory::Tier::GPU, 1024); - batches.emplace_back(std::make_shared(batch_id, std::move(data))); + batches.emplace_back(data_batch::make(batch_id, std::move(data))); } return batches; } @@ -916,15 +547,6 @@ TEST_CASE("shared_data_repository using get_data_batch_by_id Multiple Partitions REQUIRE(repository.total_size() == 0); } -TEST_CASE("unique_data_repository throws an error when trying to get a batch by id", - "[data_repository]") -{ - unique_data_repository repository; - REQUIRE_THROWS_WITH(repository.get_data_batch_by_id(0, 0), - "get_data_batch_by_id is not supported for unique_ptr repositories. Use " - "pop_data_batch to move ownership instead."); -} - // ============================================================================= // Tests for pop_next_data_batch // ============================================================================= @@ -940,7 +562,7 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns idle batch", "[dat shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(1, std::move(data)); + auto batch = data_batch::make(1, std::move(data)); repository.add_data_batch(batch); auto popped = repository.pop_next_data_batch(); @@ -954,8 +576,8 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns read_only batch", shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(2, std::move(data)); - auto accessor = batch->to_read_only(); + auto batch = data_batch::make(2, std::move(data)); + auto accessor = batch->get_read_only(); repository.add_data_batch(batch); auto popped = repository.pop_next_data_batch(); @@ -968,8 +590,8 @@ TEST_CASE("shared_data_repository pop_next_data_batch returns mutable batch", "[ shared_data_repository repository; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(3, std::move(data)); - auto accessor = batch->to_mutable(); + auto batch = data_batch::make(3, std::move(data)); + auto accessor = batch->get_mutable(); repository.add_data_batch(batch); auto popped = repository.pop_next_data_batch(); @@ -983,15 +605,15 @@ TEST_CASE("shared_data_repository pop_next_data_batch FIFO regardless of state", shared_data_repository repository; auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); + auto batch1 = data_batch::make(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(2, std::move(data2)); + auto batch2 = data_batch::make(2, std::move(data2)); auto data3 = std::make_unique(memory::Tier::GPU, 1024); - auto batch3 = std::make_shared(3, std::move(data3)); + auto batch3 = data_batch::make(3, std::move(data3)); // batch1 read_only, batch2 mutable, batch3 idle — all three should come out in order - auto ro_accessor = batch1->to_read_only(); - auto mut_accessor = batch2->to_mutable(); + auto ro_accessor = batch1->get_read_only(); + auto mut_accessor = batch2->get_mutable(); repository.add_data_batch(batch1); repository.add_data_batch(batch2); @@ -1017,10 +639,10 @@ TEST_CASE("shared_data_repository pop_next_data_batch with partitions", "[data_r shared_data_repository repository; auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_shared(1, std::move(data1)); + auto batch1 = data_batch::make(1, std::move(data1)); auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_shared(2, std::move(data2)); - auto accessor = batch1->to_read_only(); + auto batch2 = data_batch::make(2, std::move(data2)); + auto accessor = batch1->get_read_only(); repository.add_data_batch(batch1, 0); repository.add_data_batch(batch2, 1); @@ -1036,33 +658,3 @@ TEST_CASE("shared_data_repository pop_next_data_batch with partitions", "[data_r REQUIRE(repository.pop_next_data_batch(0) == nullptr); REQUIRE(repository.pop_next_data_batch(1) == nullptr); } - -TEST_CASE("unique_data_repository pop_next_data_batch empty returns nullptr", "[data_repository]") -{ - unique_data_repository repository; - REQUIRE(repository.pop_next_data_batch() == nullptr); -} - -TEST_CASE("unique_data_repository pop_next_data_batch returns batch regardless of state", - "[data_repository]") -{ - unique_data_repository repository; - - auto data1 = std::make_unique(memory::Tier::GPU, 1024); - auto batch1 = std::make_unique(1, std::move(data1)); - auto data2 = std::make_unique(memory::Tier::GPU, 1024); - auto batch2 = std::make_unique(2, std::move(data2)); - - repository.add_data_batch(std::move(batch1)); - repository.add_data_batch(std::move(batch2)); - - auto p1 = repository.pop_next_data_batch(); - REQUIRE(p1 != nullptr); - REQUIRE(p1->get_batch_id() == 1); - - auto p2 = repository.pop_next_data_batch(); - REQUIRE(p2 != nullptr); - REQUIRE(p2->get_batch_id() == 2); - - REQUIRE(repository.pop_next_data_batch() == nullptr); -} diff --git a/test/data/test_data_repository_manager.cpp b/test/data/test_data_repository_manager.cpp index a42bbe7..33f61b0 100644 --- a/test/data/test_data_repository_manager.cpp +++ b/test/data/test_data_repository_manager.cpp @@ -125,7 +125,7 @@ TEST_CASE("shared_data_repository_manager Add Data Batch Single Operator", // Create and add batch auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> operator_ports = {{operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -152,7 +152,7 @@ TEST_CASE("shared_data_repository_manager Add Data Batch Multiple Operators", // Create and add batch to all operators auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> operator_ports; for (size_t id : operator_ids) { @@ -229,7 +229,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Add Batch", "[data_reposit for (int j = 0; j < batches_per_thread; ++j) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } }); @@ -251,111 +251,6 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Add Batch", "[data_reposit REQUIRE(count == num_threads * batches_per_thread); } -// ============================================================================= -// Tests for unique_ptr based repository manager -// ============================================================================= - -// Test basic construction -TEST_CASE("unique_data_repository_manager Construction", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Manager should be empty initially - // Accessing non-existent repository should throw - REQUIRE_THROWS_AS(manager.get_repository(0, "default"), std::out_of_range); -} - -// Test adding a single repository with unique_ptr -TEST_CASE("unique_data_repository_manager Add Single Repository", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - size_t operator_id = 1; - auto repository = std::make_unique(); - manager.add_new_repository(operator_id, "default", std::move(repository)); - - // Repository should be accessible - auto& repo = manager.get_repository(operator_id, "default"); - REQUIRE(repo != nullptr); -} - -// Test adding data batch with unique_ptr (single operator only) -TEST_CASE("unique_data_repository_manager Add Data Batch Single Operator", - "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Add repository - size_t operator_id = 1; - manager.add_new_repository(operator_id, "default", std::make_unique()); - - // Create and add batch - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - - std::vector> operator_ports = {{operator_id, "default"}}; - manager.add_data_batch(std::move(batch), operator_ports); - - // Repository should have the batch - auto& repo = manager.get_repository(operator_id, "default"); - auto pulled_batch = repo->pop_next_data_batch(); - REQUIRE(pulled_batch != nullptr); - REQUIRE(pulled_batch->get_batch_id() == batch_id); -} - -// Test that unique_ptr throws when adding to multiple operators -TEST_CASE("unique_data_repository_manager Add Batch Multiple Operators Throws", - "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Add multiple repositories - manager.add_new_repository(1, "default", std::make_unique()); - manager.add_new_repository(2, "default", std::make_unique()); - - // Create batch - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - - // Trying to add to multiple operators should throw - std::vector> operator_ports = {{1, "default"}, - {2, "default"}}; - REQUIRE_THROWS_AS(manager.add_data_batch(std::move(batch), operator_ports), std::runtime_error); -} - -// Test adding multiple batches with unique_ptr -TEST_CASE("unique_data_repository_manager Add Multiple Batches", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Add repository - size_t operator_id = 1; - manager.add_new_repository(operator_id, "default", std::make_unique()); - - constexpr int num_batches = 10; - std::vector> operator_ports = {{operator_id, "default"}}; - - // Add multiple batches - for (int i = 0; i < num_batches; ++i) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - manager.add_data_batch(std::move(batch), operator_ports); - } - - // Repository should have all batches - auto& repo = manager.get_repository(operator_id, "default"); - int count = 0; - while (true) { - auto batch = repo->pop_next_data_batch(); - if (!batch) break; - ++count; - } - REQUIRE(count == num_batches); -} - // ============================================================================= // Integration Tests // ============================================================================= @@ -377,7 +272,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> all_ports; for (size_t id : operator_ids) { all_ports.push_back({id, "default"}); @@ -389,7 +284,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 2048); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> p0 = {{0, "default"}}; manager.add_data_batch(batch, p0); } @@ -398,7 +293,7 @@ TEST_CASE("shared_data_repository_manager Full Workflow", "[data_repository_mana { auto data = std::make_unique(memory::Tier::GPU, 4096); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> p12 = {{1, "default"}, {2, "default"}}; manager.add_data_batch(batch, p12); } @@ -475,7 +370,7 @@ TEST_CASE("shared_data_repository_manager Large Number of Batches", "[data_repos for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -539,7 +434,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Mixed Operations", std::vector threads; std::atomic batch_count{0}; std::mutex pull_mutex; - std::vector> all_batches; + std::vector> all_batches; // Launch threads doing mixed operations for (int i = 0; i < num_threads; ++i) { @@ -551,7 +446,7 @@ TEST_CASE("shared_data_repository_manager Thread-Safe Mixed Operations", // Add batch to random operator size_t operator_id = (i + j) % 5; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> operator_ports = { {operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -613,7 +508,7 @@ TEST_CASE("shared_data_repository_manager Concurrent Add and Pull", "[data_repos // Add batch to one or more operators size_t operator_id = (i + j) % num_operators; auto data = std::make_unique(memory::Tier::GPU, 1024); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); std::vector> operator_ports = { {operator_id, "default"}}; manager.add_data_batch(batch, operator_ports); @@ -705,7 +600,7 @@ TEST_CASE("shared_data_repository_manager High Contention Add Pull", "[data_repo // Add a batch uint64_t batch_id = manager.get_next_data_batch_id(); auto data = std::make_unique(memory::Tier::GPU, 512); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); ++total_added; @@ -760,7 +655,7 @@ TEST_CASE("shared_data_repository_manager Concurrent Add Multiple Operators Per for (int i = 0; i < num_batches; ++i) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, all_operator_ports); } @@ -797,229 +692,6 @@ TEST_CASE("shared_data_repository_manager Concurrent Add Multiple Operators Per } } -// ============================================================================= -// unique_ptr Manager Thread-Safety Tests -// ============================================================================= - -// Test concurrent batch ID generation with unique_ptr manager -TEST_CASE("unique_data_repository_manager Thread-Safe Batch ID Generation", - "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - constexpr int num_threads = 10; - constexpr int ids_per_thread = 100; - - std::vector threads; - std::vector> thread_ids(num_threads); - - // Launch threads to generate IDs - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&, i]() { - for (int j = 0; j < ids_per_thread; ++j) { - thread_ids[i].push_back(manager.get_next_data_batch_id()); - } - }); - } - - // Wait for all threads - for (auto& thread : threads) { - thread.join(); - } - - // Collect all IDs - std::vector all_ids; - for (const auto& ids : thread_ids) { - all_ids.insert(all_ids.end(), ids.begin(), ids.end()); - } - - // All IDs should be unique - std::sort(all_ids.begin(), all_ids.end()); - auto last = std::unique(all_ids.begin(), all_ids.end()); - REQUIRE(last == all_ids.end()); - REQUIRE(all_ids.size() == num_threads * ids_per_thread); -} - -// Test concurrent batch addition with unique_ptr -TEST_CASE("unique_data_repository_manager Thread-Safe Add Batch", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Add repository - size_t operator_id = 1; - manager.add_new_repository(operator_id, "default", std::make_unique()); - - constexpr int num_threads = 10; - constexpr int batches_per_thread = 50; - - std::vector threads; - std::vector> operator_ports = {{operator_id, "default"}}; - - // Launch threads to add batches - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&]() { - for (int j = 0; j < batches_per_thread; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - manager.add_data_batch(std::move(batch), operator_ports); - } - }); - } - - // Wait for all threads - for (auto& thread : threads) { - thread.join(); - } - - // Repository should have all batches - auto& repo = manager.get_repository(operator_id, "default"); - int count = 0; - while (true) { - auto batch = repo->pop_next_data_batch(); - if (!batch) break; - ++count; - } - REQUIRE(count == num_threads * batches_per_thread); -} - -// Test concurrent add and pull with unique_ptr -TEST_CASE("unique_data_repository_manager Concurrent Add and Pull", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - // Add repository - size_t operator_id = 0; - manager.add_new_repository(operator_id, "default", std::make_unique()); - - constexpr int num_adder_threads = 5; - constexpr int num_puller_threads = 5; - constexpr int batches_per_adder = 100; - - std::vector threads; - std::atomic batches_added{0}; - std::atomic batches_pulled{0}; - std::atomic keep_adding{true}; - - std::vector> operator_ports = {{operator_id, "default"}}; - - // Launch adder threads - for (int i = 0; i < num_adder_threads; ++i) { - threads.emplace_back([&]() { - for (int j = 0; j < batches_per_adder; ++j) { - auto data = std::make_unique(memory::Tier::GPU, 1024); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - manager.add_data_batch(std::move(batch), operator_ports); - - ++batches_added; - - // Small delay to allow pullers to work - std::this_thread::sleep_for(std::chrono::microseconds(100)); - } - }); - } - - // Launch puller threads - for (int i = 0; i < num_puller_threads; ++i) { - threads.emplace_back([&]() { - auto& repo = manager.get_repository(operator_id, "default"); - - while (keep_adding.load()) { - auto batch = repo->pop_next_data_batch(); - if (batch) { - ++batches_pulled; - } else { - std::this_thread::yield(); - } - } - - // Final cleanup - while (true) { - auto batch = repo->pop_next_data_batch(); - if (!batch) break; - ++batches_pulled; - } - }); - } - - // Wait for adder threads to complete - for (int i = 0; i < num_adder_threads; ++i) { - threads[i].join(); - } - - // Signal pullers that adding is done - keep_adding.store(false); - - // Wait for puller threads to complete - for (size_t i = num_adder_threads; i < threads.size(); ++i) { - threads[i].join(); - } - - // Verify all batches were added - REQUIRE(batches_added == num_adder_threads * batches_per_adder); - - // Verify all batches were pulled - REQUIRE(batches_pulled == num_adder_threads * batches_per_adder); -} - -// Test high contention with unique_ptr -TEST_CASE("unique_data_repository_manager High Contention", "[data_repository_manager]") -{ - unique_data_repository_manager manager; - - size_t operator_id = 0; - manager.add_new_repository(operator_id, "default", std::make_unique()); - - constexpr int num_threads = 20; - constexpr int operations_per_thread = 50; - - std::vector threads; - std::atomic total_added{0}; - std::atomic total_pulled{0}; - - std::vector> operator_ports = {{operator_id, "default"}}; - - // Launch threads doing both add and pull operations - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back([&]() { - auto& repo = manager.get_repository(operator_id, "default"); - - for (int j = 0; j < operations_per_thread; ++j) { - // Add a batch - auto data = std::make_unique(memory::Tier::GPU, 512); - uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_unique(batch_id, std::move(data)); - manager.add_data_batch(std::move(batch), operator_ports); - ++total_added; - - // Immediately try to pull a batch - auto pulled = repo->pop_next_data_batch(); - if (pulled) { ++total_pulled; } - } - }); - } - - // Wait for all threads - for (auto& thread : threads) { - thread.join(); - } - - // Verify counts - REQUIRE(total_added == num_threads * operations_per_thread); - - // Clean up remaining batches - auto& repo = manager.get_repository(operator_id, "default"); - while (true) { - auto batch = repo->pop_next_data_batch(); - if (!batch) break; - ++total_pulled; - } - - // All batches should have been processed - REQUIRE(total_pulled == total_added); -} - // ============================================================================= // Edge Case Tests // ============================================================================= @@ -1072,7 +744,7 @@ TEST_CASE("shared_data_repository_manager Batches With Different Sizes", for (size_t size : sizes) { auto data = std::make_unique(memory::Tier::GPU, size); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -1104,7 +776,7 @@ TEST_CASE("shared_data_repository_manager Batches With Different Tiers", for (memory::Tier tier : tiers) { auto data = std::make_unique(tier, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); } @@ -1135,7 +807,7 @@ TEST_CASE("shared_data_repository_manager Rapid Add Pull Cycles", "[data_reposit for (int cycle = 0; cycle < 100; ++cycle) { auto data = std::make_unique(memory::Tier::GPU, 1024); uint64_t batch_id = manager.get_next_data_batch_id(); - auto batch = std::make_shared(batch_id, std::move(data)); + auto batch = data_batch::make(batch_id, std::move(data)); manager.add_data_batch(batch, operator_ports); // Pull the batch From 1566c283d36c9dda1312e357834da819e66d9df8 Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Wed, 13 May 2026 16:42:44 +0530 Subject: [PATCH 6/7] move clone and clone_to to data_batch_core --- include/cucascade/data/data_batch.hpp | 111 ++++++++++++++------------ src/data/data_batch.cpp | 18 ++++- test/data/test_data_batch.cpp | 54 ++++++------- 3 files changed, 101 insertions(+), 82 deletions(-) diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index f2ae38f..c582a70 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -62,8 +62,7 @@ class data_batch; * State transitions and synchronization live on data_batch. This core object only * exposes data/tier/memory methods to friend accessor classes. * - * @note Non-copyable and non-movable. The object itself never moves; only the - * smart pointer to it is transferred between states. + * @note Non-copyable and non-movable. The object itself never moves. */ class data_batch_core { friend data_batch; @@ -86,7 +85,7 @@ class data_batch_core { * * @return The batch ID (immutable after construction). */ - uint64_t get_batch_id() const; + [[nodiscard]] uint64_t get_batch_id() const; // Only friend accessor classes can call these methods. @@ -94,19 +93,19 @@ class data_batch_core { * @brief Get the memory tier of the held data. * @return The current memory tier. */ - memory::Tier get_current_tier() const; + [[nodiscard]] memory::Tier get_current_tier() const; /** * @brief Get a raw pointer to the data representation. * @return Non-owning pointer to the data, or nullptr if empty. */ - idata_representation* get_data() const; + [[nodiscard]] const idata_representation* get_data() const; /** * @brief Get a raw pointer to the memory space. * @return Non-owning pointer to the memory space, or nullptr if data is null. */ - memory::memory_space* get_memory_space() const; + [[nodiscard]] const memory::memory_space* get_memory_space() const; /** * @brief Replace the data representation. @@ -148,6 +147,40 @@ class data_batch_core { } } + /** + * @brief Create an independent deep copy with representation conversion. + * + * The clone has a new batch ID and its data is converted to TargetRepresentation + * using the provided converter registry. + * + * @tparam TargetRepresentation Target representation type. + * @param registry Converter registry for type-keyed dispatch. + * @param new_batch_id Batch ID for the cloned batch. + * @param target_memory_space Target memory space for the converted data. + * @param stream CUDA stream for memory operations. + * @return A new data_batch wrapped in shared_ptr. + */ + template + [[nodiscard]] std::shared_ptr clone_to( + representation_converter_registry& registry, + uint64_t new_batch_id, + const memory::memory_space* target_memory_space, + rmm::cuda_stream_view stream) const; + + /** + * @brief Create an independent deep copy of the batch data. + * + * The clone has a new batch ID and its own copy of the data representation, + * residing in the same memory space as the original. + * + * @param new_batch_id Batch ID for the cloned batch. + * @param stream CUDA stream for memory operations. + * @return A new data_batch wrapped in shared_ptr. + * @throws std::runtime_error if the data is null. + */ + [[nodiscard]] std::shared_ptr clone(uint64_t new_batch_id, + rmm::cuda_stream_view stream) const; + private: data_batch_core(uint64_t batch_id, std::unique_ptr data); @@ -164,13 +197,17 @@ class data_batch : public std::enable_shared_from_this { friend mutable_data_batch; public: + /** + * @brief Factory function to create new data_batches. + */ static std::shared_ptr make(uint64_t batch_id, std::unique_ptr data); /** * @brief Get the unique batch identifier. * - * Lock-free -- safe to call without acquiring an accessor. + * Lock-free -- safe to call without acquiring an accessor + * since batch_id is immutable after construction. * * @return The batch ID (immutable after construction). */ @@ -256,53 +293,8 @@ class data_batch : public std::enable_shared_from_this { */ size_t get_read_only_count() const { return _read_only_count.load(std::memory_order_acquire); } - /** - * @brief Create an independent deep copy with representation conversion. - * - * The clone has a new batch ID and its data is converted to TargetRepresentation - * using the provided converter registry. - * - * @tparam TargetRepresentation Target representation type. - * @param registry Converter registry for type-keyed dispatch. - * @param new_batch_id Batch ID for the cloned batch. - * @param target_memory_space Target memory space for the converted data. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - */ - template - std::shared_ptr clone_to(representation_converter_registry& registry, - uint64_t new_batch_id, - const memory::memory_space* target_memory_space, - rmm::cuda_stream_view stream) const - { - auto new_representation = - registry.convert(*(_batch._data), target_memory_space, stream); - return make(new_batch_id, std::move(new_representation)); - } - - /** - * @brief Create an independent deep copy of the batch data. - * - * The clone has a new batch ID and its own copy of the data representation, - * residing in the same memory space as the original. - * - * @param new_batch_id Batch ID for the cloned batch. - * @param stream CUDA stream for memory operations. - * @return A new data_batch wrapped in shared_ptr. - * @throws std::runtime_error if the data is null. - */ - [[nodiscard]] std::shared_ptr clone(uint64_t new_batch_id, - rmm::cuda_stream_view stream) const - { - auto cloned_data = _batch._data->clone(stream); - return make(new_batch_id, std::move(cloned_data)); - } - private: - data_batch(uint64_t batch_id, std::unique_ptr data) - : _batch(data_batch_core(batch_id, std::move(data))) - { - } + data_batch(uint64_t batch_id, std::unique_ptr data); data_batch_core _batch; mutable std::shared_mutex _rw_mutex; @@ -312,6 +304,19 @@ class data_batch : public std::enable_shared_from_this { std::atomic _read_only_count{0}; ///< Count of active read_only_data_batch instances }; +// Defined after data_batch's concrete definition to use data_batch::make +template +[[nodiscard]] std::shared_ptr data_batch_core::clone_to( + representation_converter_registry& registry, + uint64_t new_batch_id, + const memory::memory_space* target_memory_space, + rmm::cuda_stream_view stream) const +{ + auto new_representation = + registry.convert(*_data, target_memory_space, stream); + return data_batch::make(new_batch_id, std::move(new_representation)); +} + /** * @brief RAII read-only accessor for data_batch. * diff --git a/src/data/data_batch.cpp b/src/data/data_batch.cpp index da41c09..50c58de 100644 --- a/src/data/data_batch.cpp +++ b/src/data/data_batch.cpp @@ -17,6 +17,8 @@ #include +#include + namespace cucascade { // ========== data_batch_core ========== @@ -25,13 +27,20 @@ uint64_t data_batch_core::get_batch_id() const { return _batch_id; } memory::Tier data_batch_core::get_current_tier() const { return _data->get_current_tier(); } -idata_representation* data_batch_core::get_data() const { return _data.get(); } +const idata_representation* data_batch_core::get_data() const { return _data.get(); } -memory::memory_space* data_batch_core::get_memory_space() const +const memory::memory_space* data_batch_core::get_memory_space() const { return &(_data->get_memory_space()); } +[[nodiscard]] std::shared_ptr data_batch_core::clone(uint64_t new_batch_id, + rmm::cuda_stream_view stream) const +{ + auto cloned_data = _data->clone(stream); + return data_batch::make(new_batch_id, std::move(cloned_data)); +} + data_batch_core::data_batch_core(uint64_t batch_id, std::unique_ptr data) : _batch_id(batch_id), _data(std::move(data)) { @@ -92,6 +101,11 @@ void data_batch::unsubscribe() } } +data_batch::data_batch(uint64_t batch_id, std::unique_ptr data) + : _batch(batch_id, std::move(data)) +{ +} + // ========== read_only_data_batch ========== read_only_data_batch::~read_only_data_batch() diff --git a/test/data/test_data_batch.cpp b/test/data/test_data_batch.cpp index f588a35..5dc337d 100644 --- a/test/data/test_data_batch.cpp +++ b/test/data/test_data_batch.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -112,6 +111,7 @@ TEST_CASE("data_batch get_read_only acquires shared access", "[data_batch]") auto batch = data_batch::make(1, std::move(data)); auto ro = batch->get_read_only(); + REQUIRE(ro->get_batch_id() == 1); REQUIRE(ro->get_current_tier() == memory::Tier::GPU); } @@ -491,7 +491,7 @@ TEST_CASE("data_batch clone creates independent copy", "[data_batch]") auto batch = data_batch::make(42, std::move(data)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(100, rmm::cuda_stream_view{}); + auto cloned = ro->clone(100, rmm::cuda_stream_view{}); REQUIRE(cloned != nullptr); REQUIRE(cloned->get_batch_id() == 100); @@ -510,13 +510,13 @@ TEST_CASE("data_batch clone with different batch IDs", "[data_batch]") auto ro = batch->get_read_only(); - auto clone1 = batch->clone(1, rmm::cuda_stream_view{}); + auto clone1 = ro->clone(1, rmm::cuda_stream_view{}); REQUIRE(clone1->get_batch_id() == 1); - auto clone2 = batch->clone(0, rmm::cuda_stream_view{}); + auto clone2 = ro->clone(0, rmm::cuda_stream_view{}); REQUIRE(clone2->get_batch_id() == 0); - auto clone3 = batch->clone(UINT64_MAX, rmm::cuda_stream_view{}); + auto clone3 = ro->clone(UINT64_MAX, rmm::cuda_stream_view{}); REQUIRE(clone3->get_batch_id() == UINT64_MAX); } @@ -527,7 +527,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") auto data = std::make_unique(memory::Tier::GPU, 1024); auto batch = data_batch::make(1, std::move(data)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto cloned = ro->clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->get_read_only(); REQUIRE(ro_cl->get_current_tier() == memory::Tier::GPU); } @@ -536,7 +536,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") auto data = std::make_unique(memory::Tier::HOST, 1024); auto batch = data_batch::make(1, std::move(data)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto cloned = ro->clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->get_read_only(); REQUIRE(ro_cl->get_current_tier() == memory::Tier::HOST); } @@ -545,7 +545,7 @@ TEST_CASE("data_batch clone preserves tier information", "[data_batch]") auto data = std::make_unique(memory::Tier::DISK, 1024); auto batch = data_batch::make(1, std::move(data)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, rmm::cuda_stream_view{}); + auto cloned = ro->clone(2, rmm::cuda_stream_view{}); auto ro_cl = cloned->get_read_only(); REQUIRE(ro_cl->get_current_tier() == memory::Tier::DISK); } @@ -569,14 +569,14 @@ TEST_CASE("data_batch clone with real GPU data verifies data integrity", "[data_ auto batch = data_batch::make(1, std::move(gpu_repr)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, stream.view()); + auto cloned = ro->clone(2, stream.view()); REQUIRE(cloned != nullptr); REQUIRE(cloned->get_batch_id() == 2); auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro->get_data()); - auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + const auto* original_repr = dynamic_cast(ro->get_data()); + const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(original_repr != nullptr); REQUIRE(cloned_repr != nullptr); @@ -600,12 +600,12 @@ TEST_CASE("data_batch clone creates independent memory copies", "[data_batch][gp auto batch = data_batch::make(1, std::move(gpu_repr)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, stream.view()); + auto cloned = ro->clone(2, stream.view()); auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro->get_data()); - auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + const auto* original_repr = dynamic_cast(ro->get_data()); + const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify each column points to different memory for (cudf::size_type i = 0; i < original_repr->get_table_view().num_columns(); ++i) { @@ -626,9 +626,9 @@ TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") // Clone 3 times from the same read_only accessor (clone does not consume the accessor) auto ro = batch->get_read_only(); - auto clone1 = batch->clone(10, stream.view()); - auto clone2 = batch->clone(20, stream.view()); - auto clone3 = batch->clone(30, stream.view()); + auto clone1 = ro->clone(10, stream.view()); + auto clone2 = ro->clone(20, stream.view()); + auto clone3 = ro->clone(30, stream.view()); REQUIRE(clone1->get_batch_id() == 10); REQUIRE(clone2->get_batch_id() == 20); @@ -638,10 +638,10 @@ TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") auto ro_c2 = clone2->get_read_only(); auto ro_c3 = clone3->get_read_only(); - auto* original_repr = dynamic_cast(ro->get_data()); - auto* clone1_repr = dynamic_cast(ro_c1->get_data()); - auto* clone2_repr = dynamic_cast(ro_c2->get_data()); - auto* clone3_repr = dynamic_cast(ro_c3->get_data()); + const auto* original_repr = dynamic_cast(ro->get_data()); + const auto* clone1_repr = dynamic_cast(ro_c1->get_data()); + const auto* clone2_repr = dynamic_cast(ro_c2->get_data()); + const auto* clone3_repr = dynamic_cast(ro_c3->get_data()); stream.synchronize(); expect_cudf_tables_equal_on_stream( @@ -663,11 +663,11 @@ TEST_CASE("data_batch clone with empty table", "[data_batch][gpu]") auto batch = data_batch::make(1, std::move(gpu_repr)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, stream.view()); + auto cloned = ro->clone(2, stream.view()); REQUIRE(cloned != nullptr); - auto ro_clone = cloned->get_read_only(); - auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + auto ro_clone = cloned->get_read_only(); + const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(cloned_repr != nullptr); REQUIRE(cloned_repr->get_table_view().num_rows() == 0); REQUIRE(cloned_repr->get_table_view().num_columns() == 2); @@ -685,13 +685,13 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") auto batch = data_batch::make(1, std::move(gpu_repr)); auto ro = batch->get_read_only(); - auto cloned = batch->clone(2, stream.view()); + auto cloned = ro->clone(2, stream.view()); REQUIRE(cloned != nullptr); auto ro_clone = cloned->get_read_only(); - auto* original_repr = dynamic_cast(ro->get_data()); - auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + const auto* original_repr = dynamic_cast(ro->get_data()); + const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify structure REQUIRE(cloned_repr->get_table_view().num_rows() == 10000); From b6370b61b4957e13401f9dbdf53ad461e4d36bdc Mon Sep 17 00:00:00 2001 From: Dhruv Vats Date: Wed, 13 May 2026 17:15:54 +0530 Subject: [PATCH 7/7] dont make the get_data and get_memory_space return const ptrs yet --- include/cucascade/data/data_batch.hpp | 6 +++--- src/data/data_batch.cpp | 4 ++-- test/data/test_data_batch.cpp | 24 ++++++++++++------------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/cucascade/data/data_batch.hpp b/include/cucascade/data/data_batch.hpp index c582a70..e2025c3 100644 --- a/include/cucascade/data/data_batch.hpp +++ b/include/cucascade/data/data_batch.hpp @@ -99,13 +99,13 @@ class data_batch_core { * @brief Get a raw pointer to the data representation. * @return Non-owning pointer to the data, or nullptr if empty. */ - [[nodiscard]] const idata_representation* get_data() const; + [[nodiscard]] idata_representation* get_data() const; /** * @brief Get a raw pointer to the memory space. * @return Non-owning pointer to the memory space, or nullptr if data is null. */ - [[nodiscard]] const memory::memory_space* get_memory_space() const; + [[nodiscard]] memory::memory_space* get_memory_space() const; /** * @brief Replace the data representation. @@ -141,7 +141,7 @@ class data_batch_core { if (needs_sync) { // Conversions involving GPU may enqueue async operations on the provided - // stream that read from the source memory. Synchronize before the old + // stream that read from the source memory. Synchronize before the old // representation is destroyed to avoid use-after-free. stream.synchronize(); } diff --git a/src/data/data_batch.cpp b/src/data/data_batch.cpp index 50c58de..b80b7b8 100644 --- a/src/data/data_batch.cpp +++ b/src/data/data_batch.cpp @@ -27,9 +27,9 @@ uint64_t data_batch_core::get_batch_id() const { return _batch_id; } memory::Tier data_batch_core::get_current_tier() const { return _data->get_current_tier(); } -const idata_representation* data_batch_core::get_data() const { return _data.get(); } +idata_representation* data_batch_core::get_data() const { return _data.get(); } -const memory::memory_space* data_batch_core::get_memory_space() const +memory::memory_space* data_batch_core::get_memory_space() const { return &(_data->get_memory_space()); } diff --git a/test/data/test_data_batch.cpp b/test/data/test_data_batch.cpp index 5dc337d..33221a4 100644 --- a/test/data/test_data_batch.cpp +++ b/test/data/test_data_batch.cpp @@ -575,8 +575,8 @@ TEST_CASE("data_batch clone with real GPU data verifies data integrity", "[data_ auto ro_clone = cloned->get_read_only(); - const auto* original_repr = dynamic_cast(ro->get_data()); - const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(original_repr != nullptr); REQUIRE(cloned_repr != nullptr); @@ -604,8 +604,8 @@ TEST_CASE("data_batch clone creates independent memory copies", "[data_batch][gp auto ro_clone = cloned->get_read_only(); - const auto* original_repr = dynamic_cast(ro->get_data()); - const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify each column points to different memory for (cudf::size_type i = 0; i < original_repr->get_table_view().num_columns(); ++i) { @@ -638,10 +638,10 @@ TEST_CASE("data_batch multiple clones are all independent", "[data_batch][gpu]") auto ro_c2 = clone2->get_read_only(); auto ro_c3 = clone3->get_read_only(); - const auto* original_repr = dynamic_cast(ro->get_data()); - const auto* clone1_repr = dynamic_cast(ro_c1->get_data()); - const auto* clone2_repr = dynamic_cast(ro_c2->get_data()); - const auto* clone3_repr = dynamic_cast(ro_c3->get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* clone1_repr = dynamic_cast(ro_c1->get_data()); + auto* clone2_repr = dynamic_cast(ro_c2->get_data()); + auto* clone3_repr = dynamic_cast(ro_c3->get_data()); stream.synchronize(); expect_cudf_tables_equal_on_stream( @@ -666,8 +666,8 @@ TEST_CASE("data_batch clone with empty table", "[data_batch][gpu]") auto cloned = ro->clone(2, stream.view()); REQUIRE(cloned != nullptr); - auto ro_clone = cloned->get_read_only(); - const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + auto ro_clone = cloned->get_read_only(); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); REQUIRE(cloned_repr != nullptr); REQUIRE(cloned_repr->get_table_view().num_rows() == 0); REQUIRE(cloned_repr->get_table_view().num_columns() == 2); @@ -690,8 +690,8 @@ TEST_CASE("data_batch clone with large table", "[data_batch][gpu]") auto ro_clone = cloned->get_read_only(); - const auto* original_repr = dynamic_cast(ro->get_data()); - const auto* cloned_repr = dynamic_cast(ro_clone->get_data()); + auto* original_repr = dynamic_cast(ro->get_data()); + auto* cloned_repr = dynamic_cast(ro_clone->get_data()); // Verify structure REQUIRE(cloned_repr->get_table_view().num_rows() == 10000);