From 35abcba66868956e308d85129c7b3314e3397882 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Mon, 2 Feb 2026 13:43:12 +0100 Subject: [PATCH 1/7] [SVS] Implement 2-stage backend SVS index initialization Purpose: prevent long time SVS Tiered index lock at initialization time. Logic: First stage: create `svs::...::MutableVamanaIndex` instance with R/O shared lock Second stage: set `svs::...::MutableVamanaIndex` created before under R/W unique lock --- src/VecSim/algorithms/svs/svs.h | 74 +++++++++++++++++++++----- src/VecSim/algorithms/svs/svs_tiered.h | 24 +++++++-- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 59b056d3c..7a8091a33 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -45,6 +45,16 @@ struct SVSIndexBase virtual size_t getThreadPoolCapacity() const = 0; virtual bool isCompressed() const = 0; size_t getNumMarkedDeleted() const { return num_marked_deleted; } + + // Abstract handler to manage SVS implementation instance + // declared to avoid unsafe unique_ptr usage + // Derived SVSIndex class should implement it + struct ImplHandler { + virtual ~ImplHandler() = default; + }; + virtual std::unique_ptr createImpl(const void *vectors_data, + const labelType *labels, size_t n) = 0; + virtual void setImpl(std::unique_ptr impl) = 0; #ifdef BUILD_TESTS virtual svs::logging::logger_ptr getLogger() const = 0; #endif @@ -144,7 +154,8 @@ class SVSIndex : public VecSimIndexAbstract, fl // Create SVS index instance with initial data // Data should not be empty template - void initImpl(const Dataset &points, std::span ids) { + std::unique_ptr initImpl(const Dataset &points, + std::span ids) const { svs::threads::ThreadPoolHandle threadpool_handle{VecSimSVSThreadPool{threadpool_}}; // Construct SVS index initial storage with compression if needed @@ -160,25 +171,26 @@ class SVSIndex : public VecSimIndexAbstract, fl // Construct initial Vamana Graph auto graph = - graph_builder_t::build_graph(parameters, data, distance, threadpool_, entry_point, + graph_builder_t::build_graph(parameters, data, distance, threadpool_handle, entry_point, this->blockSize, this->getAllocator(), logger_); // Create SVS MutableIndex instance - impl_ = std::make_unique(std::move(graph), std::move(data), entry_point, - std::move(distance), ids, threadpool_, logger_); + auto impl = std::make_unique(std::move(graph), std::move(data), entry_point, + std::move(distance), ids, threadpool_, logger_); // Set SVS MutableIndex build parameters to be used in future updates - impl_->set_construction_window_size(parameters.window_size); - impl_->set_max_candidates(parameters.max_candidate_pool_size); - impl_->set_prune_to(parameters.prune_to); - impl_->set_alpha(parameters.alpha); - impl_->set_full_search_history(parameters.use_full_search_history); + impl->set_construction_window_size(parameters.window_size); + impl->set_max_candidates(parameters.max_candidate_pool_size); + impl->set_prune_to(parameters.prune_to); + impl->set_alpha(parameters.alpha); + impl->set_full_search_history(parameters.use_full_search_history); // Configure default search parameters - auto sp = impl_->get_search_parameters(); + auto sp = impl->get_search_parameters(); sp.buffer_config({this->search_window_size, this->search_buffer_capacity}); - impl_->set_search_parameters(sp); - impl_->reset_performance_parameters(); + impl->set_search_parameters(sp); + impl->reset_performance_parameters(); + return impl; } // Preprocess batch of vectors @@ -204,6 +216,42 @@ class SVSIndex : public VecSimIndexAbstract, fl return processed_blob; } + // Handler to manage SVS implementation instance + struct SVSImplHandler : public SVSIndexBase::ImplHandler { + std::unique_ptr impl; + SVSImplHandler(std::unique_ptr impl) : impl{std::move(impl)} {} + }; + + std::unique_ptr createImpl(const void *vectors_data, const labelType *labels, + size_t n) override { + // If no data provided, return empty handler + if (n == 0) { + return std::make_unique(nullptr); + } + + std::span ids(labels, n); + auto processed_blob = this->preprocessForBatchStorage(vectors_data, n); + auto typed_vectors_data = static_cast(processed_blob.get()); + // Wrap data into SVS SimpleDataView for SVS API + auto points = svs::data::SimpleDataView{typed_vectors_data, n, this->dim}; + + return std::make_unique(initImpl(points, ids)); + } + + void setImpl(std::unique_ptr handler) override { + assert(handler); + assert(impl_ == nullptr); // Should be called only on empty impl_ + if (impl_ != nullptr) { + throw ANNEXCEPTION("SVSIndex::setImpl called on non-empty impl_"); + } + + SVSImplHandler *svs_handler = dynamic_cast(handler.get()); + if (!svs_handler) { + throw ANNEXCEPTION("Failed to cast to SVSImplHandler"); + } + this->impl_ = std::move(svs_handler->impl); + } + // Assuming numThreads was updated to reflect the number of available threads before this // function was called. // This function assumes that the caller has already set numThreads to the appropriate value @@ -230,7 +278,7 @@ class SVSIndex : public VecSimIndexAbstract, fl if (!impl_) { // SVS index instance cannot be empty, so we have to construct it at first rows - initImpl(points, ids); + impl_ = initImpl(points, ids); } else { // Add new points to existing SVS index impl_->add_points(points, ids); diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index 351122367..5f20f69b9 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -672,12 +672,28 @@ class TieredSVSIndex : public VecSimTieredIndex { executeTracingCallback("UpdateJob::before_add_to_svs"); { // lock backend index for writing and add vectors there - std::lock_guard lock(this->mainIndexGuard); + std::shared_lock main_shared_lock(this->mainIndexGuard); auto svs_index = GetSVSIndex(); assert(labels_to_move.size() == vectors_to_move.size() / this->frontendIndex->getDim()); - svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); - svs_index->addVectors(vectors_to_move.data(), labels_to_move.data(), - labels_to_move.size()); + if (this->backendIndex->indexSize() == 0) { + // If backend index is empty, we need to initialize it first. + svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); + auto impl = svs_index->createImpl(vectors_to_move.data(), labels_to_move.data(), + labels_to_move.size()); + + // Upgrade to unique lock to set the new impl + main_shared_lock.unlock(); + std::lock_guard lock(this->mainIndexGuard); + svs_index->setImpl(std::move(impl)); + } else { + // Backend index is initialized - just add the vectors. + main_shared_lock.unlock(); + std::lock_guard lock(this->mainIndexGuard); + // Upgrade to unique lock to add vectors + svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size())); + svs_index->addVectors(vectors_to_move.data(), labels_to_move.data(), + labels_to_move.size()); + } } executeTracingCallback("UpdateJob::after_add_to_svs"); // clean-up frontend index From 45e6dca09fe27ccbf3c85f0de8e81fcf20e290fd Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Thu, 5 Feb 2026 15:56:19 +0100 Subject: [PATCH 2/7] Avoid unique locking for deletion of non-existing vectors from backend index. --- src/VecSim/algorithms/svs/svs.h | 11 ++++++++--- src/VecSim/algorithms/svs/svs_tiered.h | 19 ++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 7a8091a33..6fb48098a 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -39,6 +39,7 @@ struct SVSIndexBase virtual ~SVSIndexBase() = default; virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0; virtual int deleteVectors(const labelType *labels, size_t n) = 0; + virtual bool isLabelExists(labelType label) const = 0; virtual size_t indexStorageSize() const = 0; virtual size_t getNumThreads() const = 0; virtual void setNumThreads(size_t numThreads) = 0; @@ -239,15 +240,15 @@ class SVSIndex : public VecSimIndexAbstract, fl } void setImpl(std::unique_ptr handler) override { - assert(handler); + assert(handler && "SVSIndex::setImpl called with null handler"); assert(impl_ == nullptr); // Should be called only on empty impl_ if (impl_ != nullptr) { - throw ANNEXCEPTION("SVSIndex::setImpl called on non-empty impl_"); + throw std::logic_error("SVSIndex::setImpl called on non-empty impl_"); } SVSImplHandler *svs_handler = dynamic_cast(handler.get()); if (!svs_handler) { - throw ANNEXCEPTION("Failed to cast to SVSImplHandler"); + throw std::logic_error("Failed to cast to SVSImplHandler"); } this->impl_ = std::move(svs_handler->impl); } @@ -538,6 +539,10 @@ class SVSIndex : public VecSimIndexAbstract, fl return deleteVectorsImpl(labels, n); } + bool isLabelExists(labelType label) const override { + return impl_ ? impl_->has_id(label) : false; + } + size_t getNumThreads() const override { return threadpool_.size(); } void setNumThreads(size_t numThreads) override { threadpool_.resize(numThreads); } diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index 5f20f69b9..f037a82f6 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -817,8 +817,15 @@ class TieredSVSIndex : public VecSimTieredIndex { } } // Remove vector from the backend index if it exists in case of non-MULTI. - std::lock_guard lock(this->mainIndexGuard); - ret -= svs_index->deleteVectors(&label, 1); + auto label_exists = [&]() { + std::shared_lock lock(this->mainIndexGuard); + return svs_index->isLabelExists(label); + }(); + + if (label_exists) { + std::lock_guard lock(this->mainIndexGuard); + ret -= svs_index->deleteVectors(&label, 1); + } } { // Add vector to the frontend index. std::lock_guard lock(this->flatIndexGuard); @@ -903,7 +910,13 @@ class TieredSVSIndex : public VecSimTieredIndex { std::lock_guard lock(this->flatIndexGuard); ret = this->deleteAndRecordSwaps_Unsafe(label); } - { + + label_exists = [&]() { + std::shared_lock lock(this->mainIndexGuard); + return svs_index->isLabelExists(label); + }(); + + if (label_exists) { std::lock_guard lock(this->mainIndexGuard); ret += svs_index->deleteVectors(&label, 1); } From adba42af16a01429423a7834e6ad6b2006fa68ad Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Tue, 17 Feb 2026 11:44:27 +0100 Subject: [PATCH 3/7] Simplify/improve SVSIndex::deleteVector() --- src/VecSim/algorithms/svs/svs.h | 24 ++++++++++++------------ src/VecSim/algorithms/svs/svs_tiered.h | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 6fb48098a..7edc0abae 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -288,6 +288,17 @@ class SVSIndex : public VecSimIndexAbstract, fl return n - deleted_num; } + int deleteVectorImpl(const labelType label) { + if (indexLabelCount() == 0 || !impl_->has_id(label)) { + return 0; + } + + const auto deleted_num = impl_->delete_entries(std::span{&label, 1}); + + this->markIndexUpdate(deleted_num); + return deleted_num; + } + int deleteVectorsImpl(const labelType *labels, size_t n) { if (indexLabelCount() == 0) { return 0; @@ -306,19 +317,8 @@ class SVSIndex : public VecSimIndexAbstract, fl return 0; } - // If entries_to_delete.size() == 1, we should ensure single-threading - const size_t current_num_threads = getNumThreads(); - if (n == 1 && current_num_threads > 1) { - setNumThreads(1); - } - const auto deleted_num = impl_->delete_entries(entries_to_delete); - // Restore multi-threading if needed - if (n == 1 && current_num_threads > 1) { - setNumThreads(current_num_threads); - } - this->markIndexUpdate(deleted_num); return deleted_num; } @@ -533,7 +533,7 @@ class SVSIndex : public VecSimIndexAbstract, fl return addVectorsImpl(vectors_data, labels, n); } - int deleteVector(labelType label) override { return deleteVectorsImpl(&label, 1); } + int deleteVector(labelType label) override { return deleteVectorImpl(label); } int deleteVectors(const labelType *labels, size_t n) override { return deleteVectorsImpl(labels, n); diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index f037a82f6..ceeece1f3 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -824,7 +824,7 @@ class TieredSVSIndex : public VecSimTieredIndex { if (label_exists) { std::lock_guard lock(this->mainIndexGuard); - ret -= svs_index->deleteVectors(&label, 1); + ret -= this->backendIndex->deleteVector(label); } } { // Add vector to the frontend index. @@ -918,7 +918,7 @@ class TieredSVSIndex : public VecSimTieredIndex { if (label_exists) { std::lock_guard lock(this->mainIndexGuard); - ret += svs_index->deleteVectors(&label, 1); + ret += this->backendIndex->deleteVector(label); } return ret; } From 8f0dd5b79ef0529546e5b6ad4687d02e6f6605d5 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Tue, 17 Feb 2026 13:34:48 +0100 Subject: [PATCH 4/7] Add 2-stage initialization test --- src/VecSim/algorithms/svs/svs.h | 2 - tests/unit/test_svs.cpp | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h index 7edc0abae..8b03514f0 100644 --- a/src/VecSim/algorithms/svs/svs.h +++ b/src/VecSim/algorithms/svs/svs.h @@ -240,8 +240,6 @@ class SVSIndex : public VecSimIndexAbstract, fl } void setImpl(std::unique_ptr handler) override { - assert(handler && "SVSIndex::setImpl called with null handler"); - assert(impl_ == nullptr); // Should be called only on empty impl_ if (impl_ != nullptr) { throw std::logic_error("SVSIndex::setImpl called on non-empty impl_"); } diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp index 102e2ec28..8dd35dd8e 100644 --- a/tests/unit/test_svs.cpp +++ b/tests/unit/test_svs.cpp @@ -277,6 +277,78 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) { VecSimIndex_Free(index); } +TYPED_TEST(SVSTest, two_stage_initialization_test) { + size_t n = 256; + size_t k = 11; + const size_t dim = 4; + + SVSParams params = { + .dim = dim, + .metric = VecSimMetric_L2, + /* SVS-Vamana specifics */ + .alpha = 1.2, + .graph_max_degree = 64, + .construction_window_size = 20, + .max_candidate_pool_size = 1024, + .prune_to = 60, + .use_search_history = VecSimOption_ENABLE, + }; + + VecSimIndex *index = this->CreateNewIndex(params); + ASSERT_INDEX(index); + + auto svs_index = this->CastToSVS(index); + + std::vector> v(n); + for (size_t i = 0; i < n; i++) { + GenerateVector(v[i].data(), dim, i); + } + + std::vector ids(n); + std::iota(ids.begin(), ids.end(), 0); + + // 2-stage initialization + // initialization with null should fail + EXPECT_THROW(svs_index->setImpl(nullptr), std::logic_error); + + // initialization with data should succeed + auto impl = svs_index->createImpl(v.data(), ids.data(), n); + svs_index->setImpl(std::move(impl)); + + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + + TEST_DATA_T query[] = {50, 50, 50, 50}; + auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); }; + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + // Try to re-initialize with the same data. + impl = svs_index->createImpl(v.data(), ids.data(), n); + // Should fail because the index is not empty. + EXPECT_THROW(svs_index->setImpl(std::move(impl)), std::logic_error); + + // Index should remain unchanged. + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + // Delete almost all vectors + const size_t keep_num = 1; + ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num); + // setImpl() should fail again because the index is not empty. + impl = svs_index->createImpl(v.data(), ids.data(), n); + EXPECT_THROW(svs_index->setImpl(std::move(impl)), std::logic_error); + + // Delete rest of the vectors - index should be empty now and setImpl() should succeed. + ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num); + ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + // Re-initialization should succeed. + impl = svs_index->createImpl(v.data(), ids.data(), n); + svs_index->setImpl(std::move(impl)); + ASSERT_EQ(VecSimIndex_IndexSize(index), n); + runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID); + + VecSimIndex_Free(index); +} + TYPED_TEST(SVSTest, svs_get_distance) { // Scalar quantization accuracy is insufficient for this test. if (this->isFallbackToSQ()) { From e270c0024722520a67c502030fb5cd3231321455 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Wed, 18 Feb 2026 11:25:37 +0100 Subject: [PATCH 5/7] Resolve "undoing the delete" issue --- src/VecSim/algorithms/svs/svs_tiered.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index ceeece1f3..4e0477b8d 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -218,6 +218,10 @@ class TieredSVSIndex : public VecSimTieredIndex { using swap_record = std::tuple; constexpr static size_t SKIP_LABEL = std::numeric_limits::max(); std::vector swaps_journal; + // deleted_labels_journal is used by updateSVSIndex() to track vectors that were deleted from + // Flat index during SVS index updating. The journal contains the deleted labels. These labels + // are used to delete the same vectors from the SVS index at the end of the update. + std::vector deleted_labels_journal; size_t trainingTriggerThreshold; size_t updateTriggerThreshold; @@ -668,6 +672,7 @@ class TieredSVSIndex : public VecSimTieredIndex { } // reset journal to the current frontend index state swaps_journal.clear(); + deleted_labels_journal.clear(); } // release frontend index executeTracingCallback("UpdateJob::before_add_to_svs"); @@ -700,6 +705,18 @@ class TieredSVSIndex : public VecSimTieredIndex { { // lock frontend index for writing and delete moved vectors std::lock_guard lock(this->flatIndexGuard); + // delete vectors from backend index that were deleted from the frontend index during + // the update process. + std::sort(deleted_labels_journal.begin(), deleted_labels_journal.end()); + auto it = std::unique(deleted_labels_journal.begin(), deleted_labels_journal.end()); + deleted_labels_journal.erase(it, deleted_labels_journal.end()); + { + std::lock_guard main_lock(this->mainIndexGuard); + auto svs_index = GetSVSIndex(); + svs_index->deleteVectors(deleted_labels_journal.data(), + deleted_labels_journal.size()); + } + // Apply swaps from journal to labels_to_move to reflect changes made in meanwhile. applySwapsToLabelsArray(labels_to_move, this->swaps_journal); @@ -837,6 +854,7 @@ class TieredSVSIndex : public VecSimTieredIndex { for (auto id : this->frontendIndex->getElementIds(label)) { this->swaps_journal.emplace_back(SKIP_LABEL, id, id); } + deleted_labels_journal.push_back(label); } ret = std::max(ret + ft_ret, 0); // Check frontend index size to determine if an update job schedule is needed. @@ -891,6 +909,7 @@ class TieredSVSIndex : public VecSimTieredIndex { this->swaps_journal.emplace_back(SKIP_LABEL, id, id); } } + deleted_labels_journal.push_back(label); return deleting_ids.size(); } From 9862f954fe2b938eaa0e53dc11d8cb4499eb3b8a Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Wed, 18 Feb 2026 14:49:37 +0100 Subject: [PATCH 6/7] Fix lock ordering inversion issue --- src/VecSim/algorithms/svs/svs_tiered.h | 28 ++++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h index 4e0477b8d..2eac66d24 100644 --- a/src/VecSim/algorithms/svs/svs_tiered.h +++ b/src/VecSim/algorithms/svs/svs_tiered.h @@ -655,6 +655,7 @@ class TieredSVSIndex : public VecSimTieredIndex { void updateSVSIndex(size_t availableThreads) { std::vector labels_to_move; std::vector vectors_to_move; + std::vector deleted_labels_during_update; { // lock frontendIndex from modifications std::shared_lock flat_lock{this->flatIndexGuard}; @@ -705,17 +706,9 @@ class TieredSVSIndex : public VecSimTieredIndex { { // lock frontend index for writing and delete moved vectors std::lock_guard lock(this->flatIndexGuard); - // delete vectors from backend index that were deleted from the frontend index during - // the update process. - std::sort(deleted_labels_journal.begin(), deleted_labels_journal.end()); - auto it = std::unique(deleted_labels_journal.begin(), deleted_labels_journal.end()); - deleted_labels_journal.erase(it, deleted_labels_journal.end()); - { - std::lock_guard main_lock(this->mainIndexGuard); - auto svs_index = GetSVSIndex(); - svs_index->deleteVectors(deleted_labels_journal.data(), - deleted_labels_journal.size()); - } + // swap deleted labels journal with the local variable to track deleted labels during + // update + std::swap(deleted_labels_during_update, deleted_labels_journal); // Apply swaps from journal to labels_to_move to reflect changes made in meanwhile. applySwapsToLabelsArray(labels_to_move, this->swaps_journal); @@ -736,6 +729,19 @@ class TieredSVSIndex : public VecSimTieredIndex { [](labelType label) { return label != SKIP_LABEL; }) && "Deleted vectors count does not match the number of labels to delete"); } + // delete vectors from backend index that were deleted from the frontend index during + // the update process. + { + std::lock_guard main_lock(this->mainIndexGuard); + + std::sort(deleted_labels_during_update.begin(), deleted_labels_during_update.end()); + auto it = std::unique(deleted_labels_during_update.begin(), + deleted_labels_during_update.end()); + deleted_labels_during_update.erase(it, deleted_labels_during_update.end()); + auto svs_index = GetSVSIndex(); + svs_index->deleteVectors(deleted_labels_during_update.data(), + deleted_labels_during_update.size()); + } } public: From 2a3d44c188286805f62fe0a3d474fee90b3a2c40 Mon Sep 17 00:00:00 2001 From: Rafik Saliev Date: Thu, 19 Feb 2026 12:27:35 +0100 Subject: [PATCH 7/7] Add test for deleted_labels_journal --- tests/unit/test_svs_tiered.cpp | 206 +++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp index f644a84bf..3d9c0bd0d 100644 --- a/tests/unit/test_svs_tiered.cpp +++ b/tests/unit/test_svs_tiered.cpp @@ -3616,6 +3616,212 @@ TYPED_TEST(SVSTieredIndexTestBasic, testSwapJournalMulti) { ASSERT_NEAR(tiered_index->getDistanceFrom_Unsafe(n + 1, expected_vector), 0, abs_err); } +TYPED_TEST(SVSTieredIndexTestBasic, testDeletedJournalSingle) { + // Create TieredSVS index instance with a mock queue. + const size_t dim = 4; + const size_t n = 15; + + SVSParams params = { + .type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2, .multi = false}; + VecSimParams svs_params = CreateParams(params); + auto mock_thread_pool = tieredIndexMock(); + + auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, n, n * 100); + ASSERT_INDEX(tiered_index); + auto allocator = tiered_index->getAllocator(); + + // Add n vectors to the index. + for (size_t i = 0; i < n; i++) { + TEST_DATA_T vector[dim]; + GenerateVector(vector, dim, i); + tiered_index->addVector(vector, i); + } + + // Pause the update job before svs index update + std::mutex mtx; + bool adding_to_svs = false; + bool continue_job = false; + std::condition_variable cv; + + auto tracing_callback = [&]() { + { + std::unique_lock lock(mtx); + adding_to_svs = true; // Indicate that we are waiting for the update job to start. + } + cv.notify_one(); // Notify that the update job has started. + { + std::unique_lock lock(mtx); + cv.wait(lock, [&] { return continue_job; }); // Wait until we continue. + } + }; + tiered_index->registerTracingCallback("UpdateJob::before_add_to_svs", tracing_callback); + + // Launch the BG threads loop that takes jobs from the queue and executes them. + mock_thread_pool.init_threads(); + + { + // IMPORTANT!: Do not use ASSERT here, as it will not release the mutex and will cause a + // deadlock. Use EXPECT instead, so we can continue the test even if the condition is not + // met. + + // Wait for the update job to start. + std::unique_lock lock(mtx); + EXPECT_TRUE(cv.wait_for(lock, std::chrono::seconds(100), [&] { return adding_to_svs; })); + + // update job paused, we have vectors 0-(n-1) in the index, let's do index modifications + + // Remove vector label=0. + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1); + // Update vector label=1. + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, 1, 10), 0); + // Update vector label=2. + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, 2, 20), 0); + // Remove vector label=2. + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, 2), 1); + // Remove the last vector + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, n - 1), 1); + // Add a new vector - in flat only + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, n, n), 1); + // Remove vector label=n - in flat only + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, n), 1); + + continue_job = true; // Indicate that we can continue the update job. + } + + // continue the update job + cv.notify_all(); // Notify that we can continue. + + mock_thread_pool.thread_pool_join(); + + // Verify that vectors labels: {0, 1, 2, n-1} are marked as deleted in the SVS index. + ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 4); + + // Verify that the deleted vectors are not accessible. + double abs_err = 1e-2; // Allow a larger relative error for quantization. + TEST_DATA_T expected_vector[dim]; + auto svs_index = tiered_index->GetSVSIndex(); + auto backend_index = tiered_index->GetBackendIndex(); + auto flat_index = tiered_index->GetFlatIndex(); + + // labels 0, 2, n-1, n - deleted everywhere + for (size_t label : {size_t{0}, size_t{2}, n - 1, n}) { + ASSERT_FALSE(flat_index->isLabelExists(label)); + ASSERT_FALSE(svs_index->isLabelExists(label)); + GenerateVector(expected_vector, dim); + ASSERT_TRUE(std::isnan(tiered_index->getDistanceFrom_Unsafe(label, expected_vector))); + } + + // label 1 - updated to 10 but deleted in the SVS index + ASSERT_TRUE(flat_index->isLabelExists(1)); + ASSERT_FALSE(svs_index->isLabelExists(1)); + GenerateVector(expected_vector, dim, 10); + ASSERT_NEAR(tiered_index->getDistanceFrom_Unsafe(1, expected_vector), 0, abs_err); +} + +TYPED_TEST(SVSTieredIndexTestBasic, testDeletedJournalMulti) { + // Create TieredSVS index instance with a mock queue. + const size_t dim = 4; + const size_t n = 15; + + SVSParams params = { + .type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2, .multi = true}; + VecSimParams svs_params = CreateParams(params); + auto mock_thread_pool = tieredIndexMock(); + + auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, n, n * 100); + ASSERT_INDEX(tiered_index); + auto allocator = tiered_index->getAllocator(); + + // Add n vectors to the index. + for (size_t i = 0; i < n; i++) { + TEST_DATA_T vector[dim]; + GenerateVector(vector, dim, i); + tiered_index->addVector(vector, i); + } + + // Pause the update job before svs index update + std::mutex mtx; + bool adding_to_svs = false; + bool continue_job = false; + std::condition_variable cv; + + auto tracing_callback = [&]() { + { + std::unique_lock lock(mtx); + adding_to_svs = true; // Indicate that we are waiting for the update job to start. + } + cv.notify_one(); // Notify that the update job has started. + { + std::unique_lock lock(mtx); + cv.wait(lock, [&] { return continue_job; }); // Wait until we continue. + } + }; + tiered_index->registerTracingCallback("UpdateJob::before_add_to_svs", tracing_callback); + + // Launch the BG threads loop that takes jobs from the queue and executes them. + mock_thread_pool.init_threads(); + + { + // IMPORTANT!: Do not use ASSERT here, as it will not release the mutex and will cause a + // deadlock. Use EXPECT instead, so we can continue the test even if the condition is not + // met. + + // Wait for the update job to start. + std::unique_lock lock(mtx); + EXPECT_TRUE(cv.wait_for(lock, std::chrono::seconds(100), [&] { return adding_to_svs; })); + + // update job paused, we have vectors 0-(n-1) in the index, let's do index modifications + + // Remove vector label=0. + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1); + // Remove the last vector + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, n - 1), 1); + // Update vector label=2. + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, 2, 20), 1); + // Remove vector label=2. + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, 2), 2); + // Update vector label=1. + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, 1, 10), 1); + // Add a new vector - in flat only + EXPECT_EQ(GenerateAndAddVector(tiered_index, dim, n, n), 1); + // Remove vector label=n - in flat only + EXPECT_EQ(VecSimIndex_DeleteVector(tiered_index, n), 1); + + continue_job = true; // Indicate that we can continue the update job. + } + + // continue the update job + cv.notify_all(); // Notify that we can continue. + + mock_thread_pool.thread_pool_join(); + + // Verify that vectors labels: {0, 2, n-1} are marked as deleted in the SVS index. + ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 3); + + // Verify that the deleted vectors are not accessible. + double abs_err = 1e-2; // Allow a larger relative error for quantization. + TEST_DATA_T expected_vector[dim]; + auto svs_index = tiered_index->GetSVSIndex(); + auto backend_index = tiered_index->GetBackendIndex(); + auto flat_index = tiered_index->GetFlatIndex(); + + // labels 0, 2, n-1, n - deleted everywhere + for (size_t label : {size_t{0}, size_t{2}, n - 1, n}) { + ASSERT_FALSE(flat_index->isLabelExists(label)); + ASSERT_FALSE(svs_index->isLabelExists(label)); + GenerateVector(expected_vector, dim); + ASSERT_TRUE(std::isnan(tiered_index->getDistanceFrom_Unsafe(label, expected_vector))); + } + + // label 1 - multi-value 1 (in SVS) and 10 (in flat) + ASSERT_TRUE(flat_index->isLabelExists(1)); + ASSERT_TRUE(svs_index->isLabelExists(1)); + GenerateVector(expected_vector, dim, 1); + ASSERT_NEAR(backend_index->getDistanceFrom_Unsafe(1, expected_vector), 0, abs_err); + GenerateVector(expected_vector, dim, 10); + ASSERT_NEAR(flat_index->getDistanceFrom_Unsafe(1, expected_vector), 0, abs_err); +} + TEST(SVSTieredIndexTest, testThreadPool) { // Test VecSimSVSThreadPool const size_t num_threads = 4;