A few small fixes for multi-gpu PCA/TruncatedSVD (#7560)

jcrist · web-flow · commit ebb8e803c89c · 2025-12-03T15:25:16.000Z
- Don't branch the sign flipping behavior based on the version of sklearn installed. This somehow slipped through in #7331. We always want `cuml` behavior to be the same regardless of sklearn version - the only thing we branch on is the testing where we don't assert sign matches for sklearn < 1.5 (this matches the single-gpu testing strategy as well). - Adds a sync point in multi-gpu PCA before calling `signFlipComponents`. The multi-gpu implementation makes use of multiple streams, but before only the first stream was passed to `signFlipComponents` (without any sync beforehand) leading to potential stream ordering issues. It's hard to prove a negative, but with this change I can no longer reproduce an issue reported in `rapids_singlecell`. A similar fix isn't needed for `TruncatedSVD` since that implementation only uses one stream. Authors: - Jim Crist-Harif (https://github.com/jcrist) Approvers: - Simon Adorf (https://github.com/csadorf) URL: #7560
diff --git a/cpp/src/pca/pca_mg.cu b/cpp/src/pca/pca_mg.cu
@@ -78,6 +78,9 @@ void fit_impl(raft::handle_t& handle,
                            n_streams,
                            true);
   } else {
+    for (std::uint32_t i = 0; i < n_streams; i++) {
+      handle.sync_stream(streams[i]);
+    }
     signFlipComponents(handle,
                        input_data[0]->ptr,
                        components,
@@ -191,6 +194,9 @@ void fit_impl(raft::handle_t& handle,
                              n_streams,
                              true);
     } else {
+      for (std::uint32_t i = 0; i < n_streams; i++) {
+        handle.sync_stream(streams[i]);
+      }
       signFlipComponents(h,
                          input_data[0]->ptr,
                          vMatrix.data(),
diff --git a/python/cuml/cuml/decomposition/pca_mg.pyx b/python/cuml/cuml/decomposition/pca_mg.pyx
@@ -2,10 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
-
 import numpy as np
-import sklearn
-from packaging.version import Version
 
 import cuml.internals
 from cuml.decomposition import PCA
@@ -94,7 +91,7 @@ class PCAMG(BaseDecompositionMG, PCA):
         cdef uintptr_t noise_variance_ptr = noise_variance.ptr
         cdef bool use_float32 = (dtype == np.float32)
         cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
-        cdef bool flip_signs_based_on_U = (Version(sklearn.__version__) < Version("1.5.0"))
+        cdef bool flip_signs_based_on_U = self._u_based_sign_flip
 
         # Perform fit
         with nogil:
diff --git a/python/cuml/cuml/decomposition/tsvd_mg.pyx b/python/cuml/cuml/decomposition/tsvd_mg.pyx
@@ -2,10 +2,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
-
 import numpy as np
-import sklearn
-from packaging.version import Version
 
 import cuml.internals
 from cuml.decomposition import TruncatedSVD
@@ -100,7 +97,7 @@ class TSVDMG(BaseDecompositionMG, TruncatedSVD):
         cdef uintptr_t singular_values_ptr = singular_values.ptr
         cdef bool use_float32 = dtype == np.float32
         cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
-        cdef bool flip_signs_based_on_U = (Version(sklearn.__version__) < Version("1.5.0"))
+        cdef bool flip_signs_based_on_U = self._u_based_sign_flip
 
         # Perform Fit
         with nogil:
diff --git a/python/cuml/tests/dask/test_dask_pca.py b/python/cuml/tests/dask/test_dask_pca.py
@@ -1,13 +1,16 @@
 # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
-
 import cupy as cp
 import numpy as np
 import pytest
+import sklearn
+from packaging.version import Version
 
 from cuml.dask.common.dask_arr_utils import to_dask_cudf
 
+SKLEARN_GE_1_5_0 = Version(sklearn.__version__) >= Version("1.5.0")
+
 
 @pytest.mark.mg
 @pytest.mark.parametrize("nrows", [1000])
@@ -56,11 +59,12 @@ def test_pca_fit(nrows, ncols, n_parts, input_type, client):
     ]
 
     for attr in all_attr:
+        with_sign = SKLEARN_GE_1_5_0 if attr == "components_" else True
         cuml_res = getattr(cupca, attr)
         if type(cuml_res) is np.ndarray:
             cuml_res = cuml_res.to_numpy()
         skl_res = getattr(skpca, attr)
-        assert array_equal(cuml_res, skl_res, 1e-1, with_sign=True)
+        assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)
 
 
 @pytest.mark.mg
diff --git a/python/cuml/tests/dask/test_dask_tsvd.py b/python/cuml/tests/dask/test_dask_tsvd.py
@@ -5,10 +5,14 @@
 import cupy as cp
 import numpy as np
 import pytest
+import sklearn
+from packaging.version import Version
 
 from cuml.dask.common.dask_arr_utils import to_dask_cudf
 from cuml.testing.utils import array_equal, stress_param, unit_param
 
+SKLEARN_GE_1_5_0 = Version(sklearn.__version__) >= Version("1.5.0")
+
 
 @pytest.mark.mg
 @pytest.mark.parametrize(
@@ -73,6 +77,10 @@ def test_tsvd_fit(data_info, input_type, client):
         skl_res = getattr(sktsvd, attr)
         if attr == "singular_values_":
             assert array_equal(cuml_res, skl_res, 1, with_sign=True)
+        elif attr == "components_":
+            assert array_equal(
+                cuml_res, skl_res, 1e-1, with_sign=SKLEARN_GE_1_5_0
+            )
         else:
             assert array_equal(cuml_res, skl_res, 1e-1, with_sign=True)