diff --git a/include/cuco/detail/dynamic_map.inl b/include/cuco/detail/dynamic_map.inl index 05b13cc0b..d8f0f11c3 100644 --- a/include/cuco/detail/dynamic_map.inl +++ b/include/cuco/detail/dynamic_map.inl @@ -79,6 +79,7 @@ void dynamic_map::reserve(std::size_t n, cudaStrea { int64_t num_elements_remaining = n; uint32_t submap_idx = 0; + while (num_elements_remaining > 0) { std::size_t submap_capacity; @@ -130,7 +131,6 @@ void dynamic_map::insert( auto constexpr tile_size = 4; std::size_t num_to_insert = std::distance(first, last); - reserve(size_ + num_to_insert, stream); uint32_t submap_idx = 0; diff --git a/include/cuco/detail/dynamic_map/dynamic_map.inl b/include/cuco/detail/dynamic_map/dynamic_map.inl index 4d689bbba..7044f137d 100644 --- a/include/cuco/detail/dynamic_map/dynamic_map.inl +++ b/include/cuco/detail/dynamic_map/dynamic_map.inl @@ -25,6 +25,7 @@ #include #include +#include namespace cuco { namespace experimental { @@ -67,6 +68,46 @@ constexpr dynamic_map +constexpr dynamic_map:: + dynamic_map(Extent initial_capacity, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + erased_key erased_key_sentinel, + KeyEqual const& pred, + ProbingScheme const& probing_scheme, + cuda_thread_scope scope, + Storage storage, + Allocator const& alloc, + cuda::stream_ref stream) + : size_{0}, + capacity_{initial_capacity}, + min_insert_size_{static_cast(1E4)}, + max_load_factor_{0.60}, + alloc_{alloc} +{ + submaps_.push_back( + std::make_unique< + cuco::static_map>( + initial_capacity, + empty_key_sentinel, + empty_value_sentinel, + erased_key_sentinel, + pred, + probing_scheme, + scope, + storage, + alloc, + stream)); +} + template reserve(size_ + num_to_insert, stream); uint32_t submap_idx = 0; - while (num_to_insert > 0) { + while (num_to_insert > 0 && submap_idx < submaps_.size()) { auto& cur = submaps_[submap_idx]; auto capacity_remaining = max_load_factor_ * cur->capacity() - cur->size(); @@ -91,7 +133,6 @@ void dynamic_map= min_insert_size_) { auto const n = std::min(static_cast(capacity_remaining), num_to_insert); - std::size_t h_num_successes = cur->insert(first, first + n, stream); size_ += h_num_successes; @@ -126,21 +167,43 @@ void dynamic_map empty_key_sentinel{submaps_.front()->empty_key_sentinel()}; empty_value empty_value_sentinel{submaps_.front()->empty_value_sentinel()}; - + erased_key erased_key_sentinel{submaps_.front()->erased_key_sentinel()}; + if (erased_key_sentinel != empty_key_sentinel) { + submaps_.push_back(std::make_unique(submap_capacity, + empty_key_sentinel, + empty_value_sentinel, + erased_key_sentinel, + KeyEqual{}, + ProbingScheme{}, + cuda_thread_scope{}, + Storage{}, + alloc_, + stream)); + } else { + submaps_.push_back(std::make_unique(submap_capacity, + empty_key_sentinel, + empty_value_sentinel, + KeyEqual{}, + ProbingScheme{}, + cuda_thread_scope{}, + Storage{}, + alloc_, + stream)); + } submap_capacity = capacity_; - submaps_.push_back(std::make_unique(submap_capacity, - empty_key_sentinel, - empty_value_sentinel, - KeyEqual{}, - ProbingScheme{}, - cuda_thread_scope{}, - Storage{}, - alloc_, - stream)); capacity_ *= 2; } - num_elements_remaining -= max_load_factor_ * submap_capacity - min_insert_size_; + // for clamp to work + std::ptrdiff_t remaining = + static_cast(num_elements_remaining) - + static_cast(max_load_factor_ * submap_capacity - min_insert_size_); + + // size_type is unsigned long. can underflow so clamp it + num_elements_remaining = + static_cast(std::clamp(remaining, + static_cast(0), + static_cast(num_elements_remaining))); submap_idx++; } } @@ -176,5 +239,38 @@ void dynamic_map +template +void dynamic_map::erase( + InputIt first, InputIt last, cuda::stream_ref stream) +{ + auto num_keys = cuco::detail::distance(first, last); + std::size_t traversed = 0; + uint32_t submap_idx = 0; + while (num_keys > 0 && submap_idx < submaps_.size()) { + const auto& cur = submaps_[submap_idx]; + const size_t cur_size = cur->size(); + const size_t num_keys_to_process = + std::min(static_cast(cur_size), num_keys); + CUCO_CUDA_TRY(cudaStreamSynchronize(stream.get())); + + cur->erase(first, first + num_keys_to_process, stream); + + traversed += num_keys_to_process; + num_keys -= num_keys_to_process; + submap_idx++; + first += num_keys_to_process; + + size_ -= num_keys_to_process; + } +} + } // namespace experimental } // namespace cuco diff --git a/include/cuco/dynamic_map.cuh b/include/cuco/dynamic_map.cuh index 4520e87b9..d3eb3858e 100644 --- a/include/cuco/dynamic_map.cuh +++ b/include/cuco/dynamic_map.cuh @@ -112,6 +112,38 @@ class dynamic_map { Allocator const& alloc = {}, cuda::stream_ref stream = {}); + /** + * @brief Constructs a dynamically-sized map with erase capability. + * + * The capacity of the map will automatically increase as the user adds key/value pairs using + * `insert`. + * + * Capacity increases by a factor of growth_factor each time the size of the map exceeds a + * threshold occupancy. The performance of `find` and `contains` gradually decreases each time the + * map's capacity grows. + * + * @param initial_capacity The initial number of slots in the map + * @param empty_key_sentinel The reserved key value for empty slots + * @param empty_value_sentinel The reserved mapped value for empty slots + * @param erased_key_sentinel The reserved key value for empty slots + * @param pred Key equality binary predicate + * @param probing_scheme Probing scheme + * @param scope The scope in which operations will be performed + * @param storage Kind of storage to use + * @param alloc Allocator used for allocating device storage + * @param stream CUDA stream used to initialize the map + */ + constexpr dynamic_map(Extent initial_capacity, + empty_key empty_key_sentinel, + empty_value empty_value_sentinel, + erased_key erased_key_sentinel, + KeyEqual const& pred = {}, + ProbingScheme const& probing_scheme = {}, + cuda_thread_scope scope = {}, + Storage storage = {}, + Allocator const& alloc = {}, + cuda::stream_ref stream = {}); + /** * @brief Grows the capacity of the map so there is enough space for `n` key/value pairs. * @@ -157,6 +189,49 @@ class dynamic_map { OutputIt output_begin, cuda::stream_ref stream = {}) const; + /** + * @brief Erases keys in the range `[first, last)`. + * + * For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's + * associated value from the map. Else, no effect. + * + * Side-effects: + * - `contains(k) == false` + * - `find(k) == end()` + * - `insert({k,v}) == true` + * - `get_size()` is reduced by the total number of erased keys + * + * This function synchronizes `stream`. + * + * Keep in mind that `erase` does not cause the map to shrink its memory allocation. + * + * @tparam InputIt Device accessible input iterator whose `value_type` is + * convertible to the map's `value_type` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param stream Stream used for executing the kernels + * + * @throw std::runtime_error if a unique erased key sentinel value was not + * provided at construction + */ + template + void erase(InputIt first, InputIt last, cuda::stream_ref stream = {}); + + /** + * @brief Gets the current number of elements in the map + * + * @return The current number of elements in the map + */ + size_type get_size() const noexcept { return size_; } + + /** + * @brief Gets the maximum number of elements the hash map can hold. + * + * @return The maximum number of elements the hash map can hold + */ + size_type get_capacity() const noexcept { return capacity_; } + private: size_type size_{}; ///< Number of keys in the map size_type capacity_{}; ///< Maximum number of keys that can be inserted diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f2f882f01..681e4903b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -93,7 +93,8 @@ ConfigureTest(STATIC_MAP_TEST ConfigureTest(DYNAMIC_MAP_TEST dynamic_map/unique_sequence_test.cu dynamic_map/unique_sequence_test_experimental.cu - dynamic_map/erase_test.cu) + dynamic_map/erase_test.cu + dynamic_map/erase_test_experimental.cu) ################################################################################################### # - static_multiset tests ------------------------------------------------------------------------- diff --git a/tests/dynamic_map/erase_test_experimental.cu b/tests/dynamic_map/erase_test_experimental.cu new file mode 100644 index 000000000..2dab10aab --- /dev/null +++ b/tests/dynamic_map/erase_test_experimental.cu @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +TEMPLATE_TEST_CASE_SIG("erase key experimental", + "", + ((typename Key, typename Value), Key, Value), + (int32_t, int32_t), + (int32_t, int64_t), + (int64_t, int32_t), + (int64_t, int64_t)) +{ + constexpr std::size_t num_keys = 1'000'000; + cuco::experimental::dynamic_map map{num_keys * 2, + cuco::empty_key{-1}, + cuco::empty_value{-1}, + cuco::erased_key{-2}}; + + SECTION("Check single submap insert/erase experimental") + { + thrust::device_vector d_keys(num_keys); + thrust::device_vector d_values(num_keys); + thrust::device_vector d_keys_exist(num_keys); + + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1); + thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); + + auto pairs_begin = + thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), d_values.begin())); + + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.erase(d_keys.begin(), d_keys.end()); + + // delete decreases count correctly + REQUIRE(map.get_size() == 0); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + // keys were actaully deleted + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), d_keys_exist.end(), thrust::identity{})); + + // ensures that map is reusing deleted slots + map.insert(pairs_begin, pairs_begin + num_keys); + + REQUIRE(map.get_size() == num_keys); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::all_of(d_keys_exist.begin(), d_keys_exist.end(), thrust::identity{})); + + // erase can act selectively + map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2); + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of( + d_keys_exist.begin(), d_keys_exist.begin() + num_keys / 2, thrust::identity{})); + + REQUIRE(cuco::test::all_of( + d_keys_exist.begin() + num_keys / 2, d_keys_exist.end(), thrust::identity{})); + + // clear map + map.erase(d_keys.begin() + num_keys / 2, d_keys.end()); + } + + SECTION("Check multiple submaps insert/erase experimental") + { + constexpr std::size_t num = 4 * num_keys; + + thrust::device_vector d_keys(num); + thrust::device_vector d_values(num); + thrust::device_vector d_keys_exist(num); + thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1); + thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1); + + auto pairs_begin = + thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), d_values.begin())); + + map.insert(pairs_begin, pairs_begin + num); + // map should resize twice if the erased slots are successfully reused + REQUIRE(map.get_capacity() == 2 * num); + + map.erase(d_keys.begin(), d_keys.begin() + 2 * num_keys); + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of( + d_keys_exist.begin(), d_keys_exist.begin() + 2 * num_keys, thrust::identity{})); + + REQUIRE(cuco::test::all_of( + d_keys_exist.begin() + 2 * num_keys, d_keys_exist.end(), thrust::identity{})); + + REQUIRE(map.get_size() == 2 * num_keys); + // check that keys can be successfully deleted from all submaps (some will be unsuccessful + // erases) + map.erase(d_keys.begin(), d_keys.end()); + + map.contains(d_keys.begin(), d_keys.end(), d_keys_exist.begin()); + + REQUIRE(cuco::test::none_of(d_keys_exist.begin(), d_keys_exist.end(), thrust::identity{})); + + REQUIRE(map.get_size() == 0); + } +}