Copy of "ClassifierDrift bugfix #658", rebased for patch release (#662)

Ashley Scillitoe · arnaudvl · web-flow · commit c4ed24e7e9e0 · 2022-10-21T14:18:49.000+01:00
* Fix issue with returned probs_sort when train_size is used

* Update CHANGELOG.md

Co-authored-by: Arnaud Van Looveren &lt;avl@seldon.io&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 ### Fixed
 - Fixed an incorrect default value for the `alternative` kwarg in the `FETDrift` detector ([#661](https://github.com/SeldonIO/alibi-detect/pull/661)).
+- Fixed an issue with `ClassifierDrift` returning incorrect prediction probabilities when `train_size` given ([#662](https://github.com/SeldonIO/alibi-detect/pull/662)).
 
 ## v0.10.3
 ## [v0.10.3](https://github.com/SeldonIO/alibi-detect/tree/v0.10.3) (2022-08-17)
diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
@@ -153,14 +153,13 @@ def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[Union[np.ndarray, list
         else:
             return self.x_ref, x  # type: ignore[return-value]
 
-    def get_splits(self,
-                   x_ref: Union[np.ndarray, list],
-                   x: Union[np.ndarray, list],
-                   return_splits: bool = True
-                   ) -> Union[
-                        Tuple[Union[np.ndarray, list], np.ndarray],
-                        Tuple[Union[np.ndarray, list], np.ndarray, Optional[List[Tuple[np.ndarray, np.ndarray]]]]
-                    ]:
+    def get_splits(
+            self,
+            x_ref: Union[np.ndarray, list],
+            x: Union[np.ndarray, list],
+            return_splits: bool = True
+    ) -> Union[Tuple[Union[np.ndarray, list], np.ndarray],
+               Tuple[Union[np.ndarray, list], np.ndarray, Optional[List[Tuple[np.ndarray, np.ndarray]]]]]:
         """
         Split reference and test data in train and test folds used by the classifier.
 
diff --git a/alibi_detect/cd/pytorch/classifier.py b/alibi_detect/cd/pytorch/classifier.py
@@ -173,7 +173,6 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray, n
         and the out-of-fold classifier model prediction probabilities on the reference and test data
         """
         x_ref, x = self.preprocess(x)
-        n_ref, n_cur = len(x_ref), len(x)
         x, y, splits = self.get_splits(x_ref, x)  # type: ignore
 
         # iterate over folds: train a new model for each fold and make out-of-fold (oof) predictions
@@ -199,6 +198,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray, n
         probs_oof = softmax(preds_oof, axis=-1) if self.preds_type == 'logits' else preds_oof
         idx_oof = np.concatenate(idx_oof_list, axis=0)
         y_oof = y[idx_oof]
+        n_cur = y_oof.sum()
+        n_ref = len(y_oof) - n_cur
         p_val, dist = self.test_probs(y_oof, probs_oof, n_ref, n_cur)
         probs_sort = probs_oof[np.argsort(idx_oof)]
         return p_val, dist, probs_sort[:n_ref, 1], probs_sort[n_ref:, 1]
diff --git a/alibi_detect/cd/sklearn/classifier.py b/alibi_detect/cd/sklearn/classifier.py
@@ -251,29 +251,27 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray, n
 
     def _score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray, np.ndarray]:
         x_ref, x = self.preprocess(x)
-        n_ref, n_cur = len(x_ref), len(x)
         x, y, splits = self.get_splits(x_ref, x, return_splits=True)  # type: ignore
 
         # iterate over folds: train a new model for each fold and make out-of-fold (oof) predictions
         probs_oof_list, idx_oof_list = [], []
         for idx_tr, idx_te in splits:
             y_tr = y[idx_tr]
-
             if isinstance(x, np.ndarray):
                 x_tr, x_te = x[idx_tr], x[idx_te]
             elif isinstance(x, list):
                 x_tr, x_te = [x[_] for _ in idx_tr], [x[_] for _ in idx_te]
             else:
                 raise TypeError(f'x needs to be of type np.ndarray or list and not {type(x)}.')
-
             self.model.fit(x_tr, y_tr)
             probs = self.model.aux_predict_proba(x_te)
             probs_oof_list.append(probs)
             idx_oof_list.append(idx_te)
-
         probs_oof = np.concatenate(probs_oof_list, axis=0)
         idx_oof = np.concatenate(idx_oof_list, axis=0)
         y_oof = y[idx_oof]
+        n_cur = y_oof.sum()
+        n_ref = len(y_oof) - n_cur
         p_val, dist = self.test_probs(y_oof, probs_oof, n_ref, n_cur)
         probs_sort = probs_oof[np.argsort(idx_oof)]
         return p_val, dist, probs_sort[:n_ref, 1], probs_sort[n_ref:, 1]
diff --git a/alibi_detect/cd/tensorflow/classifier.py b/alibi_detect/cd/tensorflow/classifier.py
@@ -160,7 +160,6 @@ def score(self, x: np.ndarray) -> Tuple[float, float, np.ndarray, np.ndarray]:
         and the out-of-fold classifier model prediction probabilities on the reference and test data
         """
         x_ref, x = self.preprocess(x)  # type: ignore[assignment]
-        n_ref, n_cur = len(x_ref), len(x)
         x, y, splits = self.get_splits(x_ref, x)  # type: ignore
 
         # iterate over folds: train a new model for each fold and make out-of-fold (oof) predictions
@@ -186,6 +185,8 @@ def score(self, x: np.ndarray) -> Tuple[float, float, np.ndarray, np.ndarray]:
         probs_oof = softmax(preds_oof, axis=-1) if self.preds_type == 'logits' else preds_oof
         idx_oof = np.concatenate(idx_oof_list, axis=0)
         y_oof = y[idx_oof]
+        n_cur = y_oof.sum()
+        n_ref = len(y_oof) - n_cur
         p_val, dist = self.test_probs(y_oof, probs_oof, n_ref, n_cur)
         probs_sort = probs_oof[np.argsort(idx_oof)]
         return p_val, dist, probs_sort[:n_ref, 1], probs_sort[n_ref:, 1]