diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2601df03a..1048adc29 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: - id: clang-format name: clang-format language: python - entry: tools/clang-format.sh + entry: tools/clang-format.sh clang-format true args: [clang-format] additional_dependencies: ["clang-format>=15,<16"] diff --git a/bindings/cpp/CMakeLists.txt b/bindings/cpp/CMakeLists.txt index 122e5c8f7..7664fe446 100644 --- a/bindings/cpp/CMakeLists.txt +++ b/bindings/cpp/CMakeLists.txt @@ -13,7 +13,7 @@ # limitations under the License. cmake_minimum_required(VERSION 3.21) -project(svs_runtime VERSION 0.1.0 LANGUAGES CXX) +project(svs_runtime VERSION 1.0.0 LANGUAGES CXX) set(TARGET_NAME svs_runtime) set(SVS_RUNTIME_HEADERS @@ -98,14 +98,14 @@ if (SVS_RUNTIME_ENABLE_LVQ_LEANVEC) else() # Links to LTO-enabled static library, requires GCC/G++ 11.2 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11.2" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.3") - set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v1.0.0-dev/svs-shared-library-1.0.0-NIGHTLY-20251119-807-lto.tar.gz" + set(SVS_URL "" CACHE STRING "URL to download SVS shared library") else() message(WARNING "Pre-built LVQ/LeanVec SVS library requires GCC/G++ v.11.2 to apply LTO optimizations." "Current compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}" ) - set(SVS_URL "https://github.com/intel/ScalableVectorSearch/releases/download/v0.0.11/svs-shared-library-0.0.11.tar.gz" + set(SVS_URL "" CACHE STRING "URL to download SVS shared library") endif() include(FetchContent) diff --git a/bindings/cpp/include/svs/runtime/api_defs.h b/bindings/cpp/include/svs/runtime/api_defs.h index f77df9d78..940021ac0 100644 --- a/bindings/cpp/include/svs/runtime/api_defs.h +++ b/bindings/cpp/include/svs/runtime/api_defs.h @@ -208,5 +208,21 @@ struct SVS_RUNTIME_API_INTERFACE ResultsAllocator { }; } // namespace v0 + +namespace v1 { +using v0::ErrorCode; +using v0::IDFilter; +using v0::is_specified; +using v0::MetricType; +using v0::OptionalBool; +using v0::ResultsAllocator; +using v0::SearchResultsStorage; +using v0::set_if_specified; +using v0::Status; +using v0::Status_Ok; +using v0::StorageKind; +using v0::Unspecify; +} // namespace v1 + } // namespace runtime } // namespace svs diff --git a/bindings/cpp/include/svs/runtime/dynamic_vamana_index.h b/bindings/cpp/include/svs/runtime/dynamic_vamana_index.h index 97596b3af..4b9b239aa 100644 --- a/bindings/cpp/include/svs/runtime/dynamic_vamana_index.h +++ b/bindings/cpp/include/svs/runtime/dynamic_vamana_index.h @@ -125,5 +125,11 @@ struct SVS_RUNTIME_API DynamicVamanaIndexLeanVec : public DynamicVamanaIndex { }; } // namespace v0 + +namespace v1 { +using v0::DynamicVamanaIndex; +using v0::DynamicVamanaIndexLeanVec; +} // namespace v1 + } // namespace runtime } // namespace svs diff --git a/bindings/cpp/include/svs/runtime/flat_index.h b/bindings/cpp/include/svs/runtime/flat_index.h index 9c635e35d..c956fcdf1 100644 --- a/bindings/cpp/include/svs/runtime/flat_index.h +++ b/bindings/cpp/include/svs/runtime/flat_index.h @@ -47,5 +47,10 @@ struct SVS_RUNTIME_API FlatIndex { }; } // namespace v0 + +namespace v1 { +using v0::FlatIndex; +} // namespace v1 + } // namespace runtime } // namespace svs diff --git a/bindings/cpp/include/svs/runtime/training.h b/bindings/cpp/include/svs/runtime/training.h index 08b2e36d9..799a74604 100644 --- a/bindings/cpp/include/svs/runtime/training.h +++ b/bindings/cpp/include/svs/runtime/training.h @@ -27,6 +27,15 @@ namespace v0 { struct SVS_RUNTIME_API LeanVecTrainingData { virtual ~LeanVecTrainingData(); + + /* Build LeanVec training data (compression matrices) from the provided + * data. + * @param training_data Output parameter to the created training data object + * @param dim Dimensionality of the input data and queries + * @param n Number of data points and queries + * @param x Pointer to the input data + * @param leanvec_dims Number of dimensions in the resulting LeanVec data + */ static Status build( LeanVecTrainingData** training_data, size_t dim, @@ -42,5 +51,37 @@ struct SVS_RUNTIME_API LeanVecTrainingData { }; } // namespace v0 + +namespace v1 { + +struct SVS_RUNTIME_API LeanVecTrainingData : public v0::LeanVecTrainingData { + using v0::LeanVecTrainingData::destroy; + using v0::LeanVecTrainingData::save; + + /* Build LeanVec training data (compression matrices) from the provided + * data. + * Accepts optional training queries for out-of-distribution training. + * @param training_data Output parameter to the created training data object + * @param dim Dimensionality of the input data and queries + * @param n Number of data points and queries + * @param x Pointer to the input data + * @param n_train Number of training queries (can be 0) + * @param q Pointer to the training queries (can be nullptr) + * @param leanvec_dims Number of dimensions in the resulting LeanVec data + */ + static Status build( + LeanVecTrainingData** training_data, + size_t dim, + size_t n, + const float* x, + size_t n_train, + const float* q, + size_t leanvec_dims + ) noexcept; + + static Status load(LeanVecTrainingData** training_data, std::istream& in) noexcept; +}; + +} // namespace v1 } // namespace runtime } // namespace svs diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index ba9739fb4..4b50fb40c 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -69,5 +69,10 @@ struct SVS_RUNTIME_API VamanaIndex { }; } // namespace v0 + +namespace v1 { +using v0::VamanaIndex; +} // namespace v1 + } // namespace runtime } // namespace svs diff --git a/bindings/cpp/include/svs/runtime/version.h b/bindings/cpp/include/svs/runtime/version.h index 11aa5ed32..184656383 100644 --- a/bindings/cpp/include/svs/runtime/version.h +++ b/bindings/cpp/include/svs/runtime/version.h @@ -36,12 +36,12 @@ #ifndef SVS_RUNTIME_VERSION_MAJOR /// Major version number - incremented for breaking API changes /// When this changes, a new version namespace (e.g., v0 -> v1) is created -#define SVS_RUNTIME_VERSION_MAJOR 0 +#define SVS_RUNTIME_VERSION_MAJOR 1 #endif #ifndef SVS_RUNTIME_VERSION_MINOR /// Minor version number - incremented for backward-compatible feature additions -#define SVS_RUNTIME_VERSION_MINOR 1 +#define SVS_RUNTIME_VERSION_MINOR 0 #endif #ifndef SVS_RUNTIME_VERSION_PATCH @@ -51,7 +51,7 @@ #ifndef SVS_RUNTIME_VERSION_STRING /// Complete version string -#define SVS_RUNTIME_VERSION_STRING "0.1.0" +#define SVS_RUNTIME_VERSION_STRING "1.0.0" #endif #ifndef SVS_RUNTIME_API_VERSION @@ -59,13 +59,27 @@ #define SVS_RUNTIME_API_VERSION SVS_RUNTIME_VERSION_MAJOR #endif -#if (SVS_RUNTIME_API_VERSION == 0) +#if (SVS_RUNTIME_API_VERSION == 1) +/// Use v1 API +/// API version namespace (v1) +#define SVS_RUNTIME_CURRENT_API_NAMESPACE v1 +namespace svs { +namespace runtime { +namespace v0 {} +/// All public runtime APIs live here and are accessible as svs::runtime::FunctionName +/// due to inline namespace +inline namespace v1 { +// Public runtime APIs will be defined in their respective headers +// IMPORTANT: include this header before other runtime headers to ensure proper versioning +} +} // namespace runtime +} // namespace svs +#elif (SVS_RUNTIME_API_VERSION == 0) /// Use v0 API -/// Current API version namespace +/// API version namespace (v0) #define SVS_RUNTIME_CURRENT_API_NAMESPACE v0 namespace svs { namespace runtime { -/// Current API version namespace (v0) /// All public runtime APIs live here and are accessible as svs::runtime::FunctionName /// due to inline namespace inline namespace v0 { diff --git a/bindings/cpp/src/training.cpp b/bindings/cpp/src/training.cpp index 77309af3d..85e2aa2ae 100644 --- a/bindings/cpp/src/training.cpp +++ b/bindings/cpp/src/training.cpp @@ -24,6 +24,8 @@ namespace svs { namespace runtime { +namespace v0 { + LeanVecTrainingData::~LeanVecTrainingData() = default; Status LeanVecTrainingData::build( @@ -50,13 +52,56 @@ LeanVecTrainingData::load(LeanVecTrainingData** training_data, std::istream& in) *training_data = new LeanVecTrainingDataManager{LeanVecTrainingDataImpl::load(in)}; }); } + +} // namespace v0 + +namespace v1 { + +Status LeanVecTrainingData::build( + LeanVecTrainingData** training_data, + size_t dim, + size_t n, + const float* x, + size_t n_train, + const float* q, + size_t leanvec_dims +) noexcept { + return runtime_error_wrapper([&] { + const auto data = svs::data::ConstSimpleDataView(x, n, dim); + if (!q) { + // ID training + *training_data = + new LeanVecTrainingDataManager{LeanVecTrainingDataImpl{data, leanvec_dims}}; + } else { + // OOD training + const auto queries = svs::data::ConstSimpleDataView(q, n_train, dim); + *training_data = new LeanVecTrainingDataManager{ + LeanVecTrainingDataImpl{data, queries, leanvec_dims}}; + } + }); +} + +Status +LeanVecTrainingData::load(LeanVecTrainingData** training_data, std::istream& in) noexcept { + v0::LeanVecTrainingData* ptr = nullptr; + auto status = v0::LeanVecTrainingData::load(&ptr, in); + // Safe static_cast because we know the implementation (Manager) inherits v1 + *training_data = static_cast(ptr); + return status; +} + +} // namespace v1 + } // namespace runtime } // namespace svs #else // SVS_LEANVEC_HEADER namespace svs { namespace runtime { + +namespace v0 { LeanVecTrainingData::~LeanVecTrainingData() = default; + Status LeanVecTrainingData::build( LeanVecTrainingData** SVS_UNUSED(training_data), size_t SVS_UNUSED(dim), @@ -69,6 +114,7 @@ Status LeanVecTrainingData::build( "LeanVecTrainingData is not supported in this build configuration." ); } + Status LeanVecTrainingData::destroy(LeanVecTrainingData* SVS_UNUSED(training_data) ) noexcept { return Status( @@ -76,6 +122,33 @@ Status LeanVecTrainingData::destroy(LeanVecTrainingData* SVS_UNUSED(training_dat "LeanVecTrainingData is not supported in this build configuration." ); } + +Status LeanVecTrainingData::load( + LeanVecTrainingData** SVS_UNUSED(training_data), std::istream& SVS_UNUSED(in) +) noexcept { + return Status( + ErrorCode::NOT_IMPLEMENTED, + "LeanVecTrainingData is not supported in this build configuration." + ); +} +} // namespace v0 + +namespace v1 { +Status LeanVecTrainingData::build( + LeanVecTrainingData** SVS_UNUSED(training_data), + size_t SVS_UNUSED(dim), + size_t SVS_UNUSED(n), + const float* SVS_UNUSED(x), + size_t SVS_UNUSED(n_train), + const float* SVS_UNUSED(q), + size_t SVS_UNUSED(leanvec_dims) +) noexcept { + return Status( + ErrorCode::NOT_IMPLEMENTED, + "LeanVecTrainingData is not supported in this build configuration." + ); +} + Status LeanVecTrainingData::load( LeanVecTrainingData** SVS_UNUSED(training_data), std::istream& SVS_UNUSED(in) ) noexcept { @@ -84,6 +157,8 @@ Status LeanVecTrainingData::load( "LeanVecTrainingData is not supported in this build configuration." ); } +} // namespace v1 + } // namespace runtime } // namespace svs #endif // SVS_LEANVEC_HEADER diff --git a/bindings/cpp/src/training_impl.h b/bindings/cpp/src/training_impl.h index 14b7b41b3..d15a48473 100644 --- a/bindings/cpp/src/training_impl.h +++ b/bindings/cpp/src/training_impl.h @@ -45,6 +45,14 @@ struct LeanVecTrainingDataImpl { : leanvec_dims_{leanvec_dims} , leanvec_matrices_{compute_leanvec_matrices(data, leanvec_dims)} {} + LeanVecTrainingDataImpl( + const svs::data::ConstSimpleDataView& data, + const svs::data::ConstSimpleDataView& queries, + size_t leanvec_dims + ) + : leanvec_dims_{leanvec_dims} + , leanvec_matrices_{compute_leanvec_matrices_ood(data, queries, leanvec_dims)} {} + size_t get_leanvec_dims() const { return leanvec_dims_; } const LeanVecMatricesType& get_leanvec_matrices() const { return leanvec_matrices_; } @@ -83,9 +91,19 @@ struct LeanVecTrainingDataImpl { // leading explicit creation of a copy of the matrix "to avoid double free". return LeanVecMatricesType{std::move(matrix), std::move(query_matrix)}; } + + static LeanVecMatricesType compute_leanvec_matrices_ood( + const svs::data::ConstSimpleDataView& data, + const svs::data::ConstSimpleDataView& queries, + size_t leanvec_dims + ) { + return svs::leanvec::compute_leanvec_matrices_ood( + data, queries, svs::lib::MaybeStatic{leanvec_dims} + ); + } }; -struct LeanVecTrainingDataManager : public svs::runtime::LeanVecTrainingData { +struct LeanVecTrainingDataManager : public svs::runtime::v1::LeanVecTrainingData { LeanVecTrainingDataManager(LeanVecTrainingDataImpl impl) : impl_{std::move(impl)} {} diff --git a/tools/clang-format.sh b/tools/clang-format.sh index f87a7cdde..96e2fd0db 100755 --- a/tools/clang-format.sh +++ b/tools/clang-format.sh @@ -16,9 +16,14 @@ # Allow users to supply a custom path to `clang-format` CLANGFORMAT="${1:-clang-format}" +STAGED_ONLY="${2:-false}" DIRECTORIES=( "bindings/python/src" "bindings/python/include" "bindings/cpp" "include" "benchmark" "tests" "utils" "examples/cpp" ) for i in "${DIRECTORIES[@]}" do - find "./$i" \( -iname "*.h" -o -iname "*.cpp" \) ! -iname "*toml_impl.h" | xargs "$CLANGFORMAT" -i + if [[ "$STAGED_ONLY" == "true" ]]; then + git diff --cached --name-only --diff-filter=ACM | grep -E "^$i/.*\.(h|cpp)$" | grep -v "toml_impl.h" | xargs -r "$CLANGFORMAT" -i + else + find "./$i" \( -iname "*.h" -o -iname "*.cpp" \) ! -iname "*toml_impl.h" -print0 | xargs -n1 -0 "$CLANGFORMAT" -i + fi done