From 44ea179017d2249d528755fd91a2fc1e1321860c Mon Sep 17 00:00:00 2001
From: Nicola Demo <demo.nicola@gmail.com>
Date: Fri, 9 Jan 2026 10:52:42 +0100
Subject: [PATCH] sklearn wrapper for easy integration

---
 ezyrb/__init__.py                            |   9 +-
 ezyrb/approximation/__init__.py              |   4 +-
 ezyrb/approximation/sklearn_approximation.py | 124 ++++++++++++
 ezyrb/reduction/__init__.py                  |   4 +-
 ezyrb/reduction/sklearn_reduction.py         | 202 +++++++++++++++++++
 tests/test_sklearn_wrappers.py               | 146 ++++++++++++++
 6 files changed, 483 insertions(+), 6 deletions(-)
 create mode 100644 ezyrb/approximation/sklearn_approximation.py
 create mode 100644 ezyrb/reduction/sklearn_reduction.py
 create mode 100644 tests/test_sklearn_wrappers.py

diff --git a/ezyrb/__init__.py b/ezyrb/__init__.py
index 4c93959..e8ac950 100644
--- a/ezyrb/__init__.py
+++ b/ezyrb/__init__.py
@@ -1,10 +1,11 @@
 """EZyRB package"""
 
 __all__ = [
-    'Database', 'Snapshot', 'Reduction', 'POD', 'Approximation', 'RBF', 'Linear', 'GPR',
-    'ANN', 'KNeighborsRegressor', 'RadiusNeighborsRegressor', 'AE',
-    'ReducedOrderModel', 'PODAE', 'RegularGrid',
-    'MultiReducedOrderModel'
+    'Database', 'Snapshot', 'Reduction', 'POD', 'Approximation', 'RBF',
+    'Linear', 'GPR', 'ANN', 'KNeighborsRegressor',
+    'RadiusNeighborsRegressor', 'AE', 'ReducedOrderModel', 'PODAE',
+    'RegularGrid', 'MultiReducedOrderModel', 'SklearnApproximation',
+    'SklearnReduction'
 ]
 
 from .database import Database
diff --git a/ezyrb/approximation/__init__.py b/ezyrb/approximation/__init__.py
index f9db613..15aa07c 100644
--- a/ezyrb/approximation/__init__.py
+++ b/ezyrb/approximation/__init__.py
@@ -2,7 +2,8 @@
 
 __all__ = [
     'Approximation', 'RBF', 'Linear', 'GPR',
-    'ANN', 'KNeighborsRegressor', 'RadiusNeighborsRegressor'
+    'ANN', 'KNeighborsRegressor', 'RadiusNeighborsRegressor',
+    'SklearnApproximation'
 ]
 
 from .approximation import Approximation
@@ -12,3 +13,4 @@
 from .ann import ANN
 from .kneighbors_regressor import KNeighborsRegressor
 from .radius_neighbors_regressor import RadiusNeighborsRegressor
+from .sklearn_approximation import SklearnApproximation
diff --git a/ezyrb/approximation/sklearn_approximation.py b/ezyrb/approximation/sklearn_approximation.py
new file mode 100644
index 0000000..0522a5a
--- /dev/null
+++ b/ezyrb/approximation/sklearn_approximation.py
@@ -0,0 +1,124 @@
+"""
+Wrapper for using any scikit-learn regressor in EZyRB.
+"""
+
+import logging
+from .approximation import Approximation
+
+logger = logging.getLogger(__name__)
+
+
+class SklearnApproximation(Approximation):
+    """
+    Wrapper class to use any scikit-learn regressor as an approximation
+    method in EZyRB.
+
+    This class allows you to use any scikit-learn estimator that implements
+    the fit/predict interface (regressors, etc.) within the EZyRB framework.
+
+    :param sklearn_model: An instance of a scikit-learn estimator (e.g.,
+        RandomForestRegressor, SVR, KNeighborsRegressor, etc.). The model
+        should implement fit() and predict() methods.
+    :param dict fit_params: Optional parameters to pass to the fit() method.
+        Default is None.
+
+    :Example:
+        >>> from ezyrb import SklearnApproximation
+        >>> from sklearn.ensemble import RandomForestRegressor
+        >>> model = RandomForestRegressor(n_estimators=100, random_state=42)
+        >>> approximation = SklearnApproximation(model)
+        >>> approximation.fit(points, values)
+        >>> predictions = approximation.predict(new_points)
+
+    :Example:
+        >>> from ezyrb import SklearnApproximation
+        >>> from sklearn.svm import SVR
+        >>> from sklearn.multioutput import MultiOutputRegressor
+        >>> base_model = SVR(kernel='rbf', C=1.0)
+        >>> model = MultiOutputRegressor(base_model)
+        >>> approximation = SklearnApproximation(model)
+        >>> approximation.fit(points, values)
+    """
+
+    def __init__(self, sklearn_model, fit_params=None):
+        """
+        Initialize the SklearnApproximation wrapper.
+
+        :param sklearn_model: A scikit-learn estimator instance
+        :param dict fit_params: Optional fit parameters
+        """
+        logger.debug(
+            "Initializing SklearnApproximation with model: %s",
+            type(sklearn_model).__name__
+        )
+
+        if not hasattr(sklearn_model, 'fit'):
+            raise ValueError(
+                "sklearn_model must have a 'fit' method"
+            )
+        if not hasattr(sklearn_model, 'predict'):
+            raise ValueError(
+                "sklearn_model must have a 'predict' method"
+            )
+
+        self.model = sklearn_model
+        self.fit_params = fit_params if fit_params is not None else {}
+        self._fitted = False
+
+    def fit(self, points, values):
+        """
+        Fit the scikit-learn model.
+
+        :param numpy.ndarray points: The input points (training data)
+        :param numpy.ndarray values: The output values (targets)
+        """
+        logger.info(
+            "Fitting %s with %d samples",
+            type(self.model).__name__,
+            points.shape[0]
+        )
+        logger.debug(
+            "Input shape: %s, Output shape: %s",
+            points.shape,
+            values.shape
+        )
+
+        # Ensure 2D arrays
+        if points.ndim == 1:
+            points = points.reshape(-1, 1)
+        if values.ndim == 1:
+            values = values.reshape(-1, 1)
+
+        self.model.fit(points, values, **self.fit_params)
+        self._fitted = True
+
+        logger.debug("Model fitting completed")
+
+    def predict(self, new_points):
+        """
+        Predict using the fitted scikit-learn model.
+
+        :param numpy.ndarray new_points: The input points for prediction
+        :return: The predicted values
+        :rtype: numpy.ndarray
+        """
+        if not self._fitted:
+            raise RuntimeError(
+                "Model must be fitted before calling predict()"
+            )
+
+        logger.debug(
+            "Predicting for %d new points",
+            new_points.shape[0] if new_points.ndim > 1 else 1
+        )
+
+        # Ensure 2D array
+        if new_points.ndim == 1:
+            new_points = new_points.reshape(-1, 1)
+
+        predictions = self.model.predict(new_points)
+
+        logger.debug("Prediction completed, output shape: %s",
+                     predictions.shape)
+
+        return predictions
diff --git a/ezyrb/reduction/__init__.py b/ezyrb/reduction/__init__.py
index e1ff7cf..6db8d84 100644
--- a/ezyrb/reduction/__init__.py
+++ b/ezyrb/reduction/__init__.py
@@ -4,10 +4,12 @@
     'Reduction',
     'POD',
     'AE',
-    'PODAE'
+    'PODAE',
+    'SklearnReduction'
 ]
 
 from .reduction import Reduction
 from .pod import POD
 from .ae import AE
 from .pod_ae import PODAE
+from .sklearn_reduction import SklearnReduction
diff --git a/ezyrb/reduction/sklearn_reduction.py b/ezyrb/reduction/sklearn_reduction.py
new file mode 100644
index 0000000..842747d
--- /dev/null
+++ b/ezyrb/reduction/sklearn_reduction.py
@@ -0,0 +1,202 @@
+"""
+Wrapper for using any scikit-learn dimensionality reduction method in EZyRB.
+"""
+
+import logging
+from .reduction import Reduction
+
+logger = logging.getLogger(__name__)
+
+
+class SklearnReduction(Reduction):
+    """
+    Wrapper class to use any scikit-learn dimensionality reduction method
+    in EZyRB.
+
+    This class allows you to use any scikit-learn transformer that implements
+    the fit/transform/inverse_transform interface (PCA, KernelPCA, NMF, etc.)
+    within the EZyRB framework.
+
+    :param sklearn_model: An instance of a scikit-learn transformer (e.g.,
+        PCA, KernelPCA, FastICA, NMF, TruncatedSVD, etc.). The model should
+        implement fit(), transform(), and optionally inverse_transform()
+        methods.
+    :param dict fit_params: Optional parameters to pass to the fit() method.
+        Default is None.
+
+    :Example:
+        >>> from ezyrb import SklearnReduction
+        >>> from sklearn.decomposition import PCA
+        >>> model = PCA(n_components=5)
+        >>> reduction = SklearnReduction(model)
+        >>> reduction.fit(snapshots)
+        >>> reduced = reduction.transform(snapshots)
+        >>> reconstructed = reduction.inverse_transform(reduced)
+
+    :Example:
+        >>> from ezyrb import SklearnReduction
+        >>> from sklearn.decomposition import KernelPCA
+        >>> model = KernelPCA(n_components=10, kernel='rbf')
+        >>> reduction = SklearnReduction(model)
+        >>> reduction.fit(snapshots)
+
+    :Example:
+        >>> from ezyrb import SklearnReduction
+        >>> from sklearn.decomposition import FastICA
+        >>> model = FastICA(n_components=8, random_state=42)
+        >>> reduction = SklearnReduction(model)
+        >>> reduction.fit(snapshots)
+    """
+
+    def __init__(self, sklearn_model, fit_params=None):
+        """
+        Initialize the SklearnReduction wrapper.
+
+        :param sklearn_model: A scikit-learn transformer instance
+        :param dict fit_params: Optional fit parameters
+        """
+        logger.debug(
+            "Initializing SklearnReduction with model: %s",
+            type(sklearn_model).__name__
+        )
+
+        if not hasattr(sklearn_model, 'fit'):
+            raise ValueError(
+                "sklearn_model must have a 'fit' method"
+            )
+        if not hasattr(sklearn_model, 'transform'):
+            raise ValueError(
+                "sklearn_model must have a 'transform' method"
+            )
+
+        self.model = sklearn_model
+        self.fit_params = fit_params if fit_params is not None else {}
+        self._fitted = False
+        self._has_inverse = hasattr(sklearn_model, 'inverse_transform')
+
+        if not self._has_inverse:
+            logger.warning(
+                "%s does not have inverse_transform method. "
+                "inverse_transform() will raise an error.",
+                type(sklearn_model).__name__
+            )
+
+    def fit(self, values):
+        """
+        Fit the scikit-learn dimensionality reduction model.
+
+        :param numpy.ndarray values: The snapshots matrix (stored by column)
+        """
+        logger.info(
+            "Fitting %s with %d snapshots",
+            type(self.model).__name__,
+            values.shape[0]
+        )
+        logger.debug("Input shape: %s", values.shape)
+
+        # scikit-learn expects (n_samples, n_features)
+        # EZyRB stores snapshots by column, so we transpose
+        values_T = values.T
+
+        self.model.fit(values_T, **self.fit_params)
+        self._fitted = True
+
+        logger.debug("Model fitting completed")
+
+        # Log explained variance if available (e.g., PCA)
+        if hasattr(self.model, 'explained_variance_ratio_'):
+            total_var = self.model.explained_variance_ratio_.sum()
+            logger.info(
+                "Explained variance ratio: %.4f",
+                total_var
+            )
+
+    def transform(self, values):
+        """
+        Reduce the dimensionality of the given snapshots.
+
+        :param numpy.ndarray values: The snapshots matrix (stored by column)
+        :return: The reduced representation
+        :rtype: numpy.ndarray
+        """
+        if not self._fitted:
+            raise RuntimeError(
+                "Model must be fitted before calling transform()"
+            )
+
+        logger.debug(
+            "Transforming %d snapshots",
+            values.shape[0]
+        )
+
+        # Transpose for scikit-learn
+        values_T = values.T
+        reduced_T = self.model.transform(values_T)
+
+        # Transpose back to EZyRB format
+        reduced = reduced_T.T
+
+        logger.debug(
+            "Transformation completed, output shape: %s",
+            reduced.shape
+        )
+
+        return reduced
+
+    def inverse_transform(self, reduced_values):
+        """
+        Reconstruct the snapshots from reduced representation.
+
+        :param numpy.ndarray reduced_values: The reduced representation
+        :return: The reconstructed snapshots
+        :rtype: numpy.ndarray
+        """
+        if not self._fitted:
+            raise RuntimeError(
+                "Model must be fitted before calling inverse_transform()"
+            )
+
+        if not self._has_inverse:
+            raise NotImplementedError(
+                f"{type(self.model).__name__} does not implement "
+                "inverse_transform()"
+            )
+
+        logger.debug(
+            "Inverse transforming %d reduced vectors",
+            reduced_values.shape[0]
+        )
+
+        # Transpose for scikit-learn
+        reduced_T = reduced_values.T
+        reconstructed_T = self.model.inverse_transform(reduced_T)
+
+        # Transpose back to EZyRB format
+        reconstructed = reconstructed_T.T
+
+        logger.debug(
+            "Inverse transformation completed, output shape: %s",
+            reconstructed.shape
+        )
+
+        return reconstructed
+
+    def reduce(self, X):
+        """
+        Alias for transform(). Kept for backward compatibility.
+
+        :param numpy.ndarray X: The snapshots matrix
+        :return: The reduced representation
+        :rtype: numpy.ndarray
+        """
+        return self.transform(X)
+
+    def expand(self, reduced):
+        """
+        Alias for inverse_transform(). Kept for backward compatibility.
+
+        :param numpy.ndarray reduced: The reduced representation
+        :return: The reconstructed snapshots
+        :rtype: numpy.ndarray
+        """
+        return self.inverse_transform(reduced)
diff --git a/tests/test_sklearn_wrappers.py b/tests/test_sklearn_wrappers.py
new file mode 100644
index 0000000..d2084c4
--- /dev/null
+++ b/tests/test_sklearn_wrappers.py
@@ -0,0 +1,146 @@
+"""
+Test SklearnApproximation and SklearnReduction wrappers
+"""
+
+import numpy as np
+from unittest import TestCase
+from ezyrb import SklearnApproximation, SklearnReduction
+
+
+class TestSklearnWrappers(TestCase):
+    def test_sklearn_approximation_import(self):
+        """Test that SklearnApproximation can be imported"""
+        from ezyrb import SklearnApproximation
+        assert SklearnApproximation is not None
+
+    def test_sklearn_reduction_import(self):
+        """Test that SklearnReduction can be imported"""
+        from ezyrb import SklearnReduction
+        assert SklearnReduction is not None
+
+    def test_sklearn_approximation_random_forest(self):
+        """Test SklearnApproximation with RandomForestRegressor"""
+        from sklearn.ensemble import RandomForestRegressor
+        
+        model = RandomForestRegressor(n_estimators=10, random_state=42)
+        approximation = SklearnApproximation(model)
+        
+        # Create dummy data
+        X = np.random.rand(20, 3)
+        y = np.random.rand(20, 2)
+        
+        approximation.fit(X, y)
+        predictions = approximation.predict(X[:5])
+        
+        assert predictions.shape == (5, 2)
+
+    def test_sklearn_approximation_svr(self):
+        """Test SklearnApproximation with SVR"""
+        from sklearn.svm import SVR
+        from sklearn.multioutput import MultiOutputRegressor
+        
+        base_model = SVR(kernel='rbf')
+        model = MultiOutputRegressor(base_model)
+        approximation = SklearnApproximation(model)
+        
+        # Create dummy data
+        X = np.random.rand(30, 2)
+        y = np.random.rand(30, 3)
+        
+        approximation.fit(X, y)
+        predictions = approximation.predict(X[:10])
+        
+        assert predictions.shape == (10, 3)
+
+    def test_sklearn_reduction_pca(self):
+        """Test SklearnReduction with PCA"""
+        from sklearn.decomposition import PCA
+        
+        model = PCA(n_components=5)
+        reduction = SklearnReduction(model)
+        
+        # Create dummy snapshots (10 snapshots, 50 features each)
+        snapshots = np.random.rand(10, 50)
+        
+        reduction.fit(snapshots)
+        reduced = reduction.transform(snapshots)
+        
+        assert reduced.shape == (5, 50)
+        
+        # Test inverse transform
+        reconstructed = reduction.inverse_transform(reduced)
+        assert reconstructed.shape == snapshots.shape
+
+    def test_sklearn_reduction_kernel_pca(self):
+        """Test SklearnReduction with KernelPCA"""
+        from sklearn.decomposition import KernelPCA
+        
+        model = KernelPCA(n_components=3, kernel='rbf')
+        reduction = SklearnReduction(model)
+        
+        # Create dummy snapshots
+        snapshots = np.random.rand(8, 40)
+        
+        reduction.fit(snapshots)
+        reduced = reduction.transform(snapshots)
+        
+        assert reduced.shape == (3, 40)
+
+    def test_sklearn_approximation_invalid_model(self):
+        """Test that invalid model raises ValueError"""
+        class DummyModel:
+            pass
+        
+        with self.assertRaises(ValueError):
+            SklearnApproximation(DummyModel())
+
+    def test_sklearn_reduction_invalid_model(self):
+        """Test that invalid model raises ValueError"""
+        class DummyModel:
+            pass
+        
+        with self.assertRaises(ValueError):
+            SklearnReduction(DummyModel())
+
+    def test_sklearn_approximation_predict_before_fit(self):
+        """Test that predict before fit raises RuntimeError"""
+        from sklearn.linear_model import LinearRegression
+        
+        model = LinearRegression()
+        approximation = SklearnApproximation(model)
+        
+        X = np.random.rand(10, 2)
+        
+        with self.assertRaises(RuntimeError):
+            approximation.predict(X)
+
+    def test_sklearn_reduction_transform_before_fit(self):
+        """Test that transform before fit raises RuntimeError"""
+        from sklearn.decomposition import PCA
+        
+        model = PCA(n_components=2)
+        reduction = SklearnReduction(model)
+        
+        snapshots = np.random.rand(5, 20)
+        
+        with self.assertRaises(RuntimeError):
+            reduction.transform(snapshots)
+
+    def test_sklearn_reduction_reduce_expand_aliases(self):
+        """Test that reduce/expand aliases work"""
+        from sklearn.decomposition import PCA
+        
+        model = PCA(n_components=3)
+        reduction = SklearnReduction(model)
+        
+        snapshots = np.random.rand(7, 25)
+        
+        reduction.fit(snapshots)
+        
+        # Test reduce (alias for transform)
+        reduced = reduction.reduce(snapshots)
+        assert reduced.shape == (3, 25)
+        
+        # Test expand (alias for inverse_transform)
+        expanded = reduction.expand(reduced)
+        assert expanded.shape == snapshots.shape