From b5bc7ae0791d4544af9de3c7ace84feefc4323fe Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Wed, 18 Dec 2024 04:02:40 +0000 Subject: [PATCH 1/5] initial version --- SpatialDE/_internal/score_test.py | 99 +++++++++++++++++++++++++++++++ SpatialDE/de_test.py | 8 ++- setup.cfg | 2 +- 3 files changed, 105 insertions(+), 4 deletions(-) diff --git a/SpatialDE/_internal/score_test.py b/SpatialDE/_internal/score_test.py index 3f1c260..0aeeff7 100644 --- a/SpatialDE/_internal/score_test.py +++ b/SpatialDE/_internal/score_test.py @@ -273,3 +273,102 @@ def _grad_negative_negbinom_loglik(params, counts, sizefactors): + (counts - mus) / one_alpha_mu ) # d/d_alpha return -tf.convert_to_tensor((grad0, grad1)) + + +class NormalScoreTest(ScoreTest): + @dataclass + class NullModel(ScoreTest.NullModel): + mu: tf.Tensor + sigma: tf.Tensor + + def __init__( + self, + sizefactors: tf.Tensor, + omnibus: bool = False, + kernel: Optional[Union[Kernel, List[Kernel]]] = None, + ): + self.sizefactors = tf.squeeze(tf.cast(sizefactors, tf.float64)) + if tf.rank(self.sizefactors) > 1: + raise ValueError("Size factors vector must have rank 1") + + yidx = tf.cast(tf.squeeze(tf.where(self.sizefactors > 0)), tf.int32) + if tf.shape(yidx)[0] != tf.shape(self.sizefactors)[0]: + self.sizefactors = tf.gather(self.sizefactors, yidx) + else: + yidx = None + super().__init__(omnibus, kernel, yidx) + + def _fit_null(self, y: tf.Tensor) -> NullModel: + scaledy = tf.cast(y, tf.float64) / self.sizefactors + res = minimize( + lambda *args: self._negative_normal_loglik(*args).numpy(), + x0=[ + tf.math.log(tf.reduce_mean(scaledy)), + tf.zeros_like(y), + ], + args=(tf.cast(y, tf.float64), self.sizefactors), + jac=lambda *args: self._grad_negative_normal_loglik(*args).numpy(), + method="bfgs", + ) + mu = tf.exp(res.x[0]) * self.sizefactors + sigma = tf.exp(res.x[1]) + return self.NullModel(mu, sigma) + + def _test( + self, y: tf.Tensor, nullmodel: NullModel + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: + return self._do_test( + self._K, + to_default_float(y), + to_default_float(nullmodel.sigma), + to_default_float(nullmodel.mu), + ) + + @staticmethod + @tf.function(experimental_compile=True) + def _do_test( + K: tf.Tensor, rawy: tf.Tensor, sigma: tf.Tensor, mu: tf.Tensor + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: + W = 1 / sigma**2 # Delta W^-1 + stat = 0.5 * tf.reduce_sum( + (rawy / mu - 1) * W * tf.tensordot(K, W * (rawy / mu - 1), axes=(-1, -1)), axis=-1 + ) + + P = tf.linalg.diag(W) - W[:, tf.newaxis] * W[tf.newaxis, :] / tf.reduce_sum(W) + PK = W[:, tf.newaxis] * K - W[:, tf.newaxis] * ((W[tf.newaxis, :] @ K) / tf.reduce_sum(W)) + trace_PK = tf.linalg.trace(PK) + e_tilde = 0.5 * trace_PK + I_tau_tau = 0.5 * tf.reduce_sum(PK * PK, axis=(-2, -1)) + I_tau_theta = 0.5 * tf.reduce_sum(PK * P, axis=(-2, -1)) + I_theta_theta = 0.5 * tf.reduce_sum(tf.square(P), axis=(-2, -1)) + I_tau_tau_tilde = I_tau_tau - tf.square(I_tau_theta) / I_theta_theta + + return stat, e_tilde, I_tau_tau_tilde + + @staticmethod + @tf.function(experimental_compile=True) + def _negative_normal_loglik(params, observations, sizefactors=None): + mu = params[0] + mus = mu * sizefactors + log_sigma = params[1] + sigma = tf.exp(log_sigma) + + return tf.reduce_sum( + 0.5 * tf.math.log(2 * np.pi) + # Constant term + log_sigma + # Log of standard deviation + 0.5 * ((observations - mus) / sigma) ** 2 # Squared standardized residuals + ) + + @staticmethod + @tf.function(experimental_compile=True) + def _grad_negative_normal_loglik(params, observations, sizefactors): + mu = params[0] + logsigma = params[1] + mus = mu * sizefactors + sigma = tf.exp(logsigma) + + grad0 = tf.reduce_sum((observations - mus) / sigma**2) # d/d_mu + grad1 = tf.reduce_sum( + 1 - ((observations - mu) / sigma)**2 + ) # d/d_logsigma + return -tf.convert_to_tensor((grad0, grad1)) diff --git a/SpatialDE/de_test.py b/SpatialDE/de_test.py index 0fd9fb1..f44b63b 100644 --- a/SpatialDE/de_test.py +++ b/SpatialDE/de_test.py @@ -2,7 +2,7 @@ from time import time import warnings from itertools import zip_longest -from typing import Optional, Dict, Tuple, Union, List +from typing import Optional, Dict, Tuple, Union, List, Literal import numpy as np import pandas as pd @@ -16,6 +16,7 @@ from ._internal.util import bh_adjust, calc_sizefactors, default_kernel_space, kspace_walk from ._internal.score_test import ( NegativeBinomialScoreTest, + NormalScoreTest, combine_pvalues, ) from ._internal.tf_dataset import AnnDataDataset @@ -64,6 +65,7 @@ def test( sizefactors: Optional[np.ndarray] = None, stack_kernels: Optional[bool] = None, use_cache: bool = True, + obs_dist: Literal["NegativeBinomial", "Normal"] = "NegativeBinomial" ) -> Tuple[pd.DataFrame, Union[pd.DataFrame, None]]: """ Test for spatially variable genes. @@ -111,7 +113,7 @@ def test( sizefactors = calc_sizefactors(adata) if kernel_space is None: kernel_space = default_kernel_space(dcache) - + test_func = NegativeBinomialScoreTest if obs_dist == "NegativeBinomial" else NormalScoreTest individual_results = None if omnibus else [] if stack_kernels is None and adata.n_obs <= 2000 or stack_kernels or omnibus: kernels = [] @@ -119,7 +121,7 @@ def test( for k, name in kspace_walk(kernel_space, dcache): kernels.append(k) kernelnames.append(name) - test = NegativeBinomialScoreTest( + test = test_func( sizefactors, omnibus, kernels, diff --git a/setup.cfg b/setup.cfg index e53a71e..6534a7a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -name = SpatialDE +name = SpatialDE2 url = https://github.com/ilia-kats/SpatialDE description = Spatial and Temporal DE test long_description = file: README.rst From 689af9373e3293483701ab8d82b7ee13424a93db Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Wed, 18 Dec 2024 16:59:37 +0000 Subject: [PATCH 2/5] debug score test and add docstring --- SpatialDE/_internal/score_test.py | 6 +++--- SpatialDE/de_test.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/SpatialDE/_internal/score_test.py b/SpatialDE/_internal/score_test.py index 0aeeff7..49dafe2 100644 --- a/SpatialDE/_internal/score_test.py +++ b/SpatialDE/_internal/score_test.py @@ -303,14 +303,14 @@ def _fit_null(self, y: tf.Tensor) -> NullModel: res = minimize( lambda *args: self._negative_normal_loglik(*args).numpy(), x0=[ - tf.math.log(tf.reduce_mean(scaledy)), + tf.reduce_mean(scaledy), tf.zeros_like(y), ], args=(tf.cast(y, tf.float64), self.sizefactors), jac=lambda *args: self._grad_negative_normal_loglik(*args).numpy(), method="bfgs", ) - mu = tf.exp(res.x[0]) * self.sizefactors + mu = res.x[0] * self.sizefactors sigma = tf.exp(res.x[1]) return self.NullModel(mu, sigma) @@ -331,7 +331,7 @@ def _do_test( ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: W = 1 / sigma**2 # Delta W^-1 stat = 0.5 * tf.reduce_sum( - (rawy / mu - 1) * W * tf.tensordot(K, W * (rawy / mu - 1), axes=(-1, -1)), axis=-1 + (rawy - mu) * W * tf.tensordot(K, W * (rawy - mu), axes=(-1, -1)), axis=-1 ) P = tf.linalg.diag(W) - W[:, tf.newaxis] * W[tf.newaxis, :] / tf.reduce_sum(W) diff --git a/SpatialDE/de_test.py b/SpatialDE/de_test.py index f44b63b..dc14d20 100644 --- a/SpatialDE/de_test.py +++ b/SpatialDE/de_test.py @@ -64,8 +64,8 @@ def test( kernel_space: Optional[Dict[str, Union[float, List[float]]]] = None, sizefactors: Optional[np.ndarray] = None, stack_kernels: Optional[bool] = None, + obs_dist: Literal["NegativeBinomial", "Normal"] = "NegativeBinomial", use_cache: bool = True, - obs_dist: Literal["NegativeBinomial", "Normal"] = "NegativeBinomial" ) -> Tuple[pd.DataFrame, Union[pd.DataFrame, None]]: """ Test for spatially variable genes. @@ -96,6 +96,7 @@ def test( the kernel matrices. This leads to increased memory consumption, but will drastically improve runtime on GPUs for smaller data sets. Defaults to ``True`` for datasets with less than 2000 observations and ``False`` otherwise. + obs_dist: Distribution of the observations. If set as "Normal", model the regression to have Gaussian mean field error with identity link function. use_cache: Whether to use a pre-computed distance matrix for all kernels instead of computing the distance matrix anew for each kernel. Increases memory consumption, but is somewhat faster. From 3f7e6da4ec97e13dd7102239f4af2e1d0b3aa82d Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Fri, 20 Dec 2024 14:29:31 +0000 Subject: [PATCH 3/5] simplify null model fitting --- SpatialDE/_internal/score_test.py | 62 +------------------------------ SpatialDE/de_test.py | 16 +++++--- setup.cfg | 2 +- 3 files changed, 13 insertions(+), 67 deletions(-) diff --git a/SpatialDE/_internal/score_test.py b/SpatialDE/_internal/score_test.py index 49dafe2..389fa24 100644 --- a/SpatialDE/_internal/score_test.py +++ b/SpatialDE/_internal/score_test.py @@ -281,38 +281,8 @@ class NullModel(ScoreTest.NullModel): mu: tf.Tensor sigma: tf.Tensor - def __init__( - self, - sizefactors: tf.Tensor, - omnibus: bool = False, - kernel: Optional[Union[Kernel, List[Kernel]]] = None, - ): - self.sizefactors = tf.squeeze(tf.cast(sizefactors, tf.float64)) - if tf.rank(self.sizefactors) > 1: - raise ValueError("Size factors vector must have rank 1") - - yidx = tf.cast(tf.squeeze(tf.where(self.sizefactors > 0)), tf.int32) - if tf.shape(yidx)[0] != tf.shape(self.sizefactors)[0]: - self.sizefactors = tf.gather(self.sizefactors, yidx) - else: - yidx = None - super().__init__(omnibus, kernel, yidx) - def _fit_null(self, y: tf.Tensor) -> NullModel: - scaledy = tf.cast(y, tf.float64) / self.sizefactors - res = minimize( - lambda *args: self._negative_normal_loglik(*args).numpy(), - x0=[ - tf.reduce_mean(scaledy), - tf.zeros_like(y), - ], - args=(tf.cast(y, tf.float64), self.sizefactors), - jac=lambda *args: self._grad_negative_normal_loglik(*args).numpy(), - method="bfgs", - ) - mu = res.x[0] * self.sizefactors - sigma = tf.exp(res.x[1]) - return self.NullModel(mu, sigma) + return self.NullModel(tf.reduce_mean(y), tf.reduce_std(y)) def _test( self, y: tf.Tensor, nullmodel: NullModel @@ -329,7 +299,7 @@ def _test( def _do_test( K: tf.Tensor, rawy: tf.Tensor, sigma: tf.Tensor, mu: tf.Tensor ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: - W = 1 / sigma**2 # Delta W^-1 + W = 1 / sigma**2 # W^-1 stat = 0.5 * tf.reduce_sum( (rawy - mu) * W * tf.tensordot(K, W * (rawy - mu), axes=(-1, -1)), axis=-1 ) @@ -344,31 +314,3 @@ def _do_test( I_tau_tau_tilde = I_tau_tau - tf.square(I_tau_theta) / I_theta_theta return stat, e_tilde, I_tau_tau_tilde - - @staticmethod - @tf.function(experimental_compile=True) - def _negative_normal_loglik(params, observations, sizefactors=None): - mu = params[0] - mus = mu * sizefactors - log_sigma = params[1] - sigma = tf.exp(log_sigma) - - return tf.reduce_sum( - 0.5 * tf.math.log(2 * np.pi) + # Constant term - log_sigma + # Log of standard deviation - 0.5 * ((observations - mus) / sigma) ** 2 # Squared standardized residuals - ) - - @staticmethod - @tf.function(experimental_compile=True) - def _grad_negative_normal_loglik(params, observations, sizefactors): - mu = params[0] - logsigma = params[1] - mus = mu * sizefactors - sigma = tf.exp(logsigma) - - grad0 = tf.reduce_sum((observations - mus) / sigma**2) # d/d_mu - grad1 = tf.reduce_sum( - 1 - ((observations - mu) / sigma)**2 - ) # d/d_logsigma - return -tf.convert_to_tensor((grad0, grad1)) diff --git a/SpatialDE/de_test.py b/SpatialDE/de_test.py index dc14d20..c3c112e 100644 --- a/SpatialDE/de_test.py +++ b/SpatialDE/de_test.py @@ -114,7 +114,6 @@ def test( sizefactors = calc_sizefactors(adata) if kernel_space is None: kernel_space = default_kernel_space(dcache) - test_func = NegativeBinomialScoreTest if obs_dist == "NegativeBinomial" else NormalScoreTest individual_results = None if omnibus else [] if stack_kernels is None and adata.n_obs <= 2000 or stack_kernels or omnibus: kernels = [] @@ -122,11 +121,16 @@ def test( for k, name in kspace_walk(kernel_space, dcache): kernels.append(k) kernelnames.append(name) - test = test_func( - sizefactors, - omnibus, - kernels, - ) + if obs_dist == "NegativeBinomial": + test = NegativeBinomialScoreTest( + sizefactors, + omnibus, + kernels, + ) + else: + test = NormalScoreTest( + omnibus, kernels + ) results = [] with tqdm(total=adata.n_vars) as pbar: diff --git a/setup.cfg b/setup.cfg index 6534a7a..e53a71e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -name = SpatialDE2 +name = SpatialDE url = https://github.com/ilia-kats/SpatialDE description = Spatial and Temporal DE test long_description = file: README.rst From ab0dcfd9bf1232240ac4e3bdce1c4da1951f5687 Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Fri, 20 Dec 2024 20:42:16 +0000 Subject: [PATCH 4/5] Ran Black formatter --- SpatialDE/_internal/score_test.py | 2 +- SpatialDE/de_test.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/SpatialDE/_internal/score_test.py b/SpatialDE/_internal/score_test.py index 389fa24..62f329a 100644 --- a/SpatialDE/_internal/score_test.py +++ b/SpatialDE/_internal/score_test.py @@ -299,7 +299,7 @@ def _test( def _do_test( K: tf.Tensor, rawy: tf.Tensor, sigma: tf.Tensor, mu: tf.Tensor ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: - W = 1 / sigma**2 # W^-1 + W = 1 / sigma**2 # W^-1 stat = 0.5 * tf.reduce_sum( (rawy - mu) * W * tf.tensordot(K, W * (rawy - mu), axes=(-1, -1)), axis=-1 ) diff --git a/SpatialDE/de_test.py b/SpatialDE/de_test.py index c3c112e..92e5004 100644 --- a/SpatialDE/de_test.py +++ b/SpatialDE/de_test.py @@ -127,10 +127,8 @@ def test( omnibus, kernels, ) - else: - test = NormalScoreTest( - omnibus, kernels - ) + else: + test = NormalScoreTest(omnibus, kernels) results = [] with tqdm(total=adata.n_vars) as pbar: From f7b7f7d4f5ae1595d923c23df22822eb6ce78c35 Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Fri, 20 Dec 2024 20:44:44 +0000 Subject: [PATCH 5/5] store variance instead of std --- SpatialDE/_internal/score_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SpatialDE/_internal/score_test.py b/SpatialDE/_internal/score_test.py index 62f329a..a02d84c 100644 --- a/SpatialDE/_internal/score_test.py +++ b/SpatialDE/_internal/score_test.py @@ -279,10 +279,10 @@ class NormalScoreTest(ScoreTest): @dataclass class NullModel(ScoreTest.NullModel): mu: tf.Tensor - sigma: tf.Tensor + sigmasq: tf.Tensor def _fit_null(self, y: tf.Tensor) -> NullModel: - return self.NullModel(tf.reduce_mean(y), tf.reduce_std(y)) + return self.NullModel(tf.reduce_mean(y), tf.reduce_variance(y)) def _test( self, y: tf.Tensor, nullmodel: NullModel @@ -290,16 +290,16 @@ def _test( return self._do_test( self._K, to_default_float(y), - to_default_float(nullmodel.sigma), + to_default_float(nullmodel.sigmasq), to_default_float(nullmodel.mu), ) @staticmethod @tf.function(experimental_compile=True) def _do_test( - K: tf.Tensor, rawy: tf.Tensor, sigma: tf.Tensor, mu: tf.Tensor + K: tf.Tensor, rawy: tf.Tensor, sigmasq: tf.Tensor, mu: tf.Tensor ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: - W = 1 / sigma**2 # W^-1 + W = 1 / sigmasq # W^-1 stat = 0.5 * tf.reduce_sum( (rawy - mu) * W * tf.tensordot(K, W * (rawy - mu), axes=(-1, -1)), axis=-1 )