diff --git a/docs/api/scanpy_gpu.md b/docs/api/scanpy_gpu.md index a4b62405..6ebc52be 100644 --- a/docs/api/scanpy_gpu.md +++ b/docs/api/scanpy_gpu.md @@ -2,6 +2,18 @@ These functions offer accelerated near drop-in replacements for common tools provided by [`scanpy`](https://scanpy.readthedocs.io/en/stable/api/index.html) {cite}`Wolf2018`. +## Scanpy backend + +With Scanpy versions that support computational backends, RAPIDS-singlecell is available as the `rapids_singlecell` backend with the aliases `cuda`, `rapids`, `rapids-singlecell`, and `rsc`. + +```python +import scanpy as sc + +sc.settings.backend = "cuda" +``` + +The backend exposes RAPIDS-singlecell's `pp` and `tl` functions, plus {func}`rapids_singlecell.get.aggregate`, for Scanpy's backend dispatcher. + ## Preprocessing `pp` Filtering of highly-variable genes, batch-effect correction, per-cell normalization. diff --git a/docs/api/squidpy_gpu.md b/docs/api/squidpy_gpu.md index 99da5825..376765f8 100644 --- a/docs/api/squidpy_gpu.md +++ b/docs/api/squidpy_gpu.md @@ -3,6 +3,18 @@ {mod}`squidpy.gr` is a tool for the analysis of spatial molecular data {cite}`Palla2022`. {mod}`rapids_singlecell.gr` accelerates some of these functions. +## Squidpy backend + +With Squidpy versions that support computational backends, RAPIDS-singlecell is available as the `rapids_singlecell` backend with the aliases `cuda`, `rapids-singlecell`, and `rsc`. + +```python +import squidpy as sq + +sq.settings.backend = "cuda" +``` + +The backend exposes RAPIDS-singlecell's {mod}`rapids_singlecell.gr` functions for Squidpy's backend dispatcher. + ```{eval-rst} .. module:: rapids_singlecell.gr .. currentmodule:: rapids_singlecell diff --git a/docs/conf.py b/docs/conf.py index 976f6f53..50fb5910 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -79,6 +79,7 @@ "pylibraft", "dask", "cuvs", + "spatialdata", ] default_role = "literal" napoleon_google_docstring = False @@ -126,6 +127,7 @@ "statsmodels": ("https://www.statsmodels.org/stable/", None), "omnipath": ("https://omnipath.readthedocs.io/en/latest/", None), "dask": ("https://docs.dask.org/en/stable/", None), + "spatialdata": ("https://spatialdata.scverse.org/en/stable/", None), } # List of patterns, relative to source directory, that match files and diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md index 5aec4361..84697708 100644 --- a/docs/release-notes/index.md +++ b/docs/release-notes/index.md @@ -3,6 +3,10 @@ # Release notes +## Version 0.16.0 +```{include} /release-notes/0.16.0.md +``` + ## Version 0.15.0 ```{include} /release-notes/0.15.0.md ``` diff --git a/pyproject.toml b/pyproject.toml index 6dea3a07..1ad9711d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,12 @@ dev = [ "pre-commit", ] +[project.entry-points."scanpy.backends"] +rapids_singlecell = "rapids_singlecell._backends.scanpy" + +[project.entry-points."squidpy.backends"] +rapids_singlecell = "rapids_singlecell._backends.squidpy" + [project.urls] Documentation = "https://rapids-singlecell.readthedocs.io" Source = "https://github.com/scverse/rapids_singlecell" diff --git a/src/rapids_singlecell/_backends/__init__.py b/src/rapids_singlecell/_backends/__init__.py new file mode 100644 index 00000000..9d48db4f --- /dev/null +++ b/src/rapids_singlecell/_backends/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/rapids_singlecell/_backends/scanpy.py b/src/rapids_singlecell/_backends/scanpy.py new file mode 100644 index 00000000..60508a1a --- /dev/null +++ b/src/rapids_singlecell/_backends/scanpy.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from rapids_singlecell.get import aggregate +from rapids_singlecell.preprocessing import ( + bbknn, + calculate_qc_metrics, + filter_cells, + filter_genes, + filter_highly_variable, + flag_gene_family, + harmony_integrate, + highly_variable_genes, + neighbors, + normalize_pearson_residuals, + normalize_total, + regress_out, + scrublet, + scrublet_simulate_doublets, +) +from rapids_singlecell.preprocessing import log1p as _log1p +from rapids_singlecell.preprocessing import pca as _pca +from rapids_singlecell.preprocessing import scale as _scale +from rapids_singlecell.preprocessing._pca import _empty +from rapids_singlecell.tools import ( + diffmap, + draw_graph, + embedding_density, + kmeans, + leiden, + louvain, + rank_genes_groups, + rank_genes_groups_logreg, + score_genes, + score_genes_cell_cycle, + tsne, + umap, +) + +if TYPE_CHECKING: + from anndata import AnnData + from numpy.typing import DTypeLike, NDArray + +name = "rapids_singlecell" +aliases = ["cuda", "rapids", "rapids-singlecell", "rsc"] + + +def log1p( + data: AnnData, + *, + base: float | None = None, + layer: str | None = None, + obsm: str | None = None, + inplace: bool = True, + copy: bool = False, +): + return _log1p( + data, + base=base, + layer=layer, + obsm=obsm, + inplace=inplace, + copy=copy, + ) + + +def pca( + data: AnnData, + n_comps: int | None = None, + *, + layer: str | None = None, + zero_center: bool = True, + svd_solver: str | None = None, + chunked: bool = False, + chunk_size: int | None = None, + rng=None, + mask_var: NDArray | str | None = _empty, + dtype: DTypeLike = "float32", + key_added: str | None = None, + copy: bool = False, + random_state: int | None = 0, + use_highly_variable: bool | None = None, + **kwargs, +) -> None | AnnData: + if rng is not None: + random_state = rng + return _pca( + data, + n_comps=n_comps, + layer=layer, + zero_center=zero_center, + svd_solver=svd_solver, + random_state=random_state, + mask_var=mask_var, + use_highly_variable=use_highly_variable, + dtype=dtype, + chunked=chunked, + chunk_size=chunk_size, + key_added=key_added, + copy=copy, + **kwargs, + ) + + +def scale( + data: AnnData, + *, + zero_center: bool = True, + max_value: float | None = None, + copy: bool = False, + layer: str | None = None, + obsm: str | None = None, + mask_obs: NDArray | str | None = None, + inplace: bool = True, +): + return _scale( + data, + zero_center=zero_center, + max_value=max_value, + copy=copy, + layer=layer, + obsm=obsm, + mask_obs=mask_obs, + inplace=inplace, + ) + + +__all__ = [ + "aggregate", + "bbknn", + "calculate_qc_metrics", + "diffmap", + "draw_graph", + "embedding_density", + "filter_cells", + "filter_genes", + "filter_highly_variable", + "flag_gene_family", + "harmony_integrate", + "highly_variable_genes", + "kmeans", + "leiden", + "log1p", + "louvain", + "neighbors", + "normalize_pearson_residuals", + "normalize_total", + "pca", + "rank_genes_groups", + "rank_genes_groups_logreg", + "regress_out", + "scale", + "score_genes", + "score_genes_cell_cycle", + "scrublet", + "scrublet_simulate_doublets", + "tsne", + "umap", +] diff --git a/src/rapids_singlecell/_backends/squidpy.py b/src/rapids_singlecell/_backends/squidpy.py new file mode 100644 index 00000000..7afc93c5 --- /dev/null +++ b/src/rapids_singlecell/_backends/squidpy.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from rapids_singlecell.squidpy_gpu import co_occurrence, ligrec, spatial_autocorr + +name = "rapids_singlecell" +aliases = ["rapids-singlecell", "rsc", "cuda"] + +__all__ = ["co_occurrence", "ligrec", "spatial_autocorr"] diff --git a/src/rapids_singlecell/_compat.py b/src/rapids_singlecell/_compat.py index 984ae1fc..d331bc70 100644 --- a/src/rapids_singlecell/_compat.py +++ b/src/rapids_singlecell/_compat.py @@ -7,6 +7,11 @@ from scipy.sparse import csc_matrix as csc_matrix_cpu from scipy.sparse import csr_matrix as csr_matrix_cpu +try: + from spatialdata import SpatialData +except ImportError: + SpatialData = None + def _meta_dense(dtype): return cp.zeros([0], dtype=dtype) diff --git a/src/rapids_singlecell/preprocessing/_normalize.py b/src/rapids_singlecell/preprocessing/_normalize.py index 664c5b36..4b53021e 100644 --- a/src/rapids_singlecell/preprocessing/_normalize.py +++ b/src/rapids_singlecell/preprocessing/_normalize.py @@ -357,7 +357,7 @@ def _calc_log1p(X: ArrayTypesDask, base: float | None = None) -> ArrayTypesDask: def log1p( - adata: AnnData, + data: AnnData, *, base: float | None = None, layer: str | None = None, @@ -373,7 +373,7 @@ def log1p( Parameters ---------- - adata + data AnnData object base Base of the logarithm. Natural logarithm is used by default. @@ -393,6 +393,7 @@ def log1p( in-place and returns None. """ + adata = data if copy: if not inplace: raise ValueError("`copy=True` cannot be used with `inplace=False`.") diff --git a/src/rapids_singlecell/preprocessing/_pca.py b/src/rapids_singlecell/preprocessing/_pca.py index e0b25512..83b911c1 100644 --- a/src/rapids_singlecell/preprocessing/_pca.py +++ b/src/rapids_singlecell/preprocessing/_pca.py @@ -69,7 +69,7 @@ def _resolve_mask_var( def pca( - adata: AnnData, + data: AnnData, n_comps: int | None = None, *, layer: str = None, @@ -112,7 +112,7 @@ def pca( Parameters ---------- - adata + data AnnData object n_comps @@ -210,6 +210,7 @@ def pca( Explained variance, equivalent to the eigenvalues of the \ covariance matrix. """ + adata = data if use_highly_variable is True and "highly_variable" not in adata.var.keys(): raise ValueError( "Did not find adata.var['highly_variable']. " diff --git a/src/rapids_singlecell/preprocessing/_scale.py b/src/rapids_singlecell/preprocessing/_scale.py index 0818feff..e64d192d 100644 --- a/src/rapids_singlecell/preprocessing/_scale.py +++ b/src/rapids_singlecell/preprocessing/_scale.py @@ -24,7 +24,7 @@ def scale( - adata: AnnData, + data: AnnData, *, zero_center: bool = True, max_value: float | None = None, @@ -39,7 +39,7 @@ def scale( Parameters ---------- - adata + data AnnData object zero_center @@ -74,6 +74,7 @@ def scale( depending on `inplace`. """ + adata = data if copy: if not inplace: raise ValueError("`copy=True` cannot be used with `inplace=False`.") diff --git a/src/rapids_singlecell/squidpy_gpu/_autocorr.py b/src/rapids_singlecell/squidpy_gpu/_autocorr.py index 76b2a3f3..5ea63c15 100644 --- a/src/rapids_singlecell/squidpy_gpu/_autocorr.py +++ b/src/rapids_singlecell/squidpy_gpu/_autocorr.py @@ -12,6 +12,7 @@ from scipy import sparse from statsmodels.stats.multitest import multipletests +from rapids_singlecell._compat import SpatialData from rapids_singlecell.preprocessing._utils import _sparse_to_dense from ._gearysc import _gearys_C_cupy @@ -49,7 +50,7 @@ def _to_cupy(vals, *, use_sparse: bool, dtype): def spatial_autocorr( - adata: AnnData, + adata: AnnData | SpatialData, *, connectivity_key: str = "spatial_connectivities", genes: str | Sequence[str] | None = None, @@ -118,6 +119,8 @@ def spatial_autocorr( DataFrame containing the autocorrelation scores, p-values, and corrected p-values for each gene. \ If `copy` is False, the results are stored in `adata.uns` and None is returned. """ + if SpatialData is not None and isinstance(adata, SpatialData): + adata = adata.table if genes is None: if "highly_variable" in adata.var: genes = adata[:, adata.var["highly_variable"]].var_names.values diff --git a/src/rapids_singlecell/squidpy_gpu/_co_oc.py b/src/rapids_singlecell/squidpy_gpu/_co_oc.py index e14c247e..78efb33b 100644 --- a/src/rapids_singlecell/squidpy_gpu/_co_oc.py +++ b/src/rapids_singlecell/squidpy_gpu/_co_oc.py @@ -6,6 +6,7 @@ import numpy as np from cuml.metrics import pairwise_distances +from rapids_singlecell._compat import SpatialData from rapids_singlecell._cuda import _cooc_cuda as _co from rapids_singlecell._utils import ( _calculate_blocks_per_pair, @@ -21,7 +22,7 @@ def co_occurrence( - adata: AnnData, + adata: AnnData | SpatialData, cluster_key: str, *, spatial_key: str = "spatial", @@ -65,6 +66,8 @@ def co_occurrence( computed at ``interval``. """ + if SpatialData is not None and isinstance(adata, SpatialData): + adata = adata.table _assert_categorical_obs(adata, key=cluster_key) _assert_spatial_basis(adata, key=spatial_key) spatial = cp.array(adata.obsm[spatial_key]).astype(np.float32) diff --git a/src/rapids_singlecell/squidpy_gpu/_ligrec.py b/src/rapids_singlecell/squidpy_gpu/_ligrec.py index f6cbadd2..80754e25 100644 --- a/src/rapids_singlecell/squidpy_gpu/_ligrec.py +++ b/src/rapids_singlecell/squidpy_gpu/_ligrec.py @@ -14,6 +14,8 @@ from cupyx.scipy.sparse import issparse as cpissparse from scipy.sparse import csc_matrix, issparse +from rapids_singlecell._compat import SpatialData + from ._utils import _assert_categorical_obs, _create_sparse_df SOURCE = "source" @@ -118,7 +120,7 @@ def _check_tuple_needles(needles, haystack, *, msg: str, reraise: bool = True): def ligrec( - adata: AnnData, + adata: AnnData | SpatialData, cluster_key: str, *, clusters: list | None = None, @@ -233,6 +235,8 @@ def ligrec( interacting components was 0 or it didn't pass the threshold percentage of \ cells being expressed within a given cluster. """ + if SpatialData is not None and isinstance(adata, SpatialData): + adata = adata.table # Get and Check interactions if interactions is None: interactions = _get_interactions( diff --git a/tests/test_backend_conformance.py b/tests/test_backend_conformance.py new file mode 100644 index 00000000..67bb0b33 --- /dev/null +++ b/tests/test_backend_conformance.py @@ -0,0 +1,11 @@ +"""Run squidpy's backend conformance suite against the RSC backend.""" + +from __future__ import annotations + +from squidpy.testing.backend_conformance import validate_backend + + +def test_conformance(): + results = validate_backend("rapids_singlecell") + for name, status in results.items(): + assert status == "PASSED", f"{name}: {status}" diff --git a/tests/test_scanpy_backend.py b/tests/test_scanpy_backend.py new file mode 100644 index 00000000..395c0b61 --- /dev/null +++ b/tests/test_scanpy_backend.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import inspect +import tomllib +from copy import copy +from pathlib import Path + +import numpy as np +import pytest +from anndata import AnnData + +import rapids_singlecell as rsc +from rapids_singlecell._backends import scanpy as scanpy_backend + + +def _public_functions(module): + return { + name + for name, value in vars(module).items() + if not name.startswith("_") and inspect.isfunction(value) + } + + +@pytest.mark.parametrize("module", [rsc.pp, rsc.tl]) +def test_scanpy_backend_exports_public_scanpy_api(module): + assert _public_functions(module) <= set(scanpy_backend.__all__) + + +def test_scanpy_backend_exports_aggregate(): + assert scanpy_backend.aggregate is rsc.get.aggregate + assert "aggregate" in scanpy_backend.__all__ + + +@pytest.mark.parametrize("name", ["log1p", "pca", "scale"]) +def test_scanpy_backend_data_first_signature(name): + first_param = next( + iter(inspect.signature(getattr(scanpy_backend, name)).parameters) + ) + + assert first_param == "data" + + +@pytest.mark.parametrize("name", ["log1p", "pca", "scale"]) +def test_public_pp_data_first_signature(name): + params = inspect.signature(getattr(rsc.pp, name)).parameters + first_param = next(iter(params)) + + assert first_param == "data" + assert "adata" not in params + + +def test_scanpy_backend_entrypoint_is_declared(): + pyproject = tomllib.loads(Path("pyproject.toml").read_text()) + + assert pyproject["project"]["entry-points"]["scanpy.backends"] == { + "rapids_singlecell": "rapids_singlecell._backends.scanpy" + } + + +def test_scanpy_backend_dispatch_smoke(monkeypatch): + scanpy_backends = pytest.importorskip("scanpy._backends") + import scanpy as sc + + registry = scanpy_backends.dispatcher._registry + dispatch_impl = scanpy_backends.dispatcher._dispatch_impl + old_backend = scanpy_backends.settings.backend + old_state = { + "_backends": copy(registry._backends), + "_alias_map": copy(registry._alias_map), + "_load_errors": copy(registry._load_errors), + "_registration_errors": copy(registry._registration_errors), + "_warned_untrusted": copy(registry._warned_untrusted), + "_discovered": registry._discovered, + "_sig_cache": copy(dispatch_impl._sig_cache), + } + + def fake_normalize_total( + adata: AnnData, *, target_sum: float | None = None + ) -> None: + adata.X *= target_sum + adata.uns["scanpy_backend_called"] = "normalize_total" + + def fake_scale(data: AnnData, **kwargs) -> None: + data.X *= 2 + data.uns["scanpy_scale_backend_called"] = kwargs + + def fake_pca(data: AnnData, n_comps: int | None = None, **kwargs) -> None: + data.uns["scanpy_pca_backend_called"] = { + "n_comps": n_comps, + **kwargs, + } + + monkeypatch.setattr(scanpy_backend, "normalize_total", fake_normalize_total) + monkeypatch.setattr(scanpy_backend, "_scale", fake_scale) + monkeypatch.setattr(scanpy_backend, "_pca", fake_pca) + + try: + scanpy_backends.settings._backend_var.set("cpu") + registry._backends.clear() + registry._alias_map.clear() + registry._load_errors.clear() + registry._registration_errors.clear() + registry._warned_untrusted.clear() + registry._discovered = True + registry._register_backend( + scanpy_backend, + entrypoint_name="rapids_singlecell", + distribution_name="rapids-singlecell", + object_ref="rapids_singlecell._backends.scanpy", + ) + dispatch_impl._sig_cache.clear() + dispatch_impl._update_signatures() + + adata = AnnData(np.ones((2, 2), dtype=np.float32)) + sc.pp.normalize_total(adata, target_sum=3, backend="cuda") + sc.pp.scale(adata, max_value=5, backend="cuda") + sc.pp.pca(adata, n_comps=1, backend="cuda") + + np.testing.assert_allclose(adata.X, 6) + assert adata.uns["scanpy_backend_called"] == "normalize_total" + assert adata.uns["scanpy_scale_backend_called"]["max_value"] == 5 + assert adata.uns["scanpy_pca_backend_called"]["n_comps"] == 1 + finally: + scanpy_backends.settings._backend_var.set(old_backend) + registry._backends.clear() + registry._backends.update(old_state["_backends"]) + registry._alias_map.clear() + registry._alias_map.update(old_state["_alias_map"]) + registry._load_errors.clear() + registry._load_errors.update(old_state["_load_errors"]) + registry._registration_errors.clear() + registry._registration_errors.update(old_state["_registration_errors"]) + registry._warned_untrusted.clear() + registry._warned_untrusted.update(old_state["_warned_untrusted"]) + registry._discovered = old_state["_discovered"] + dispatch_impl._sig_cache.clear() + dispatch_impl._sig_cache.update(old_state["_sig_cache"]) + dispatch_impl._update_signatures()