diff --git a/DESCRIPTION b/DESCRIPTION index 3dd5b8d..43d40c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: scone -Version: 1.31.0 +Version: 1.31.1 Title: Single Cell Overview of Normalized Expression data Description: SCONE is an R package for comparing and ranking the performance of different normalization schemes for single-cell RNA-seq and other @@ -44,7 +44,8 @@ Imports: MatrixGenerics, SingleCellExperiment, DelayedMatrixStats, - sparseMatrixStats + sparseMatrixStats, + SparseArray (>= 1.7.6) Suggests: BiocStyle, DT, @@ -59,6 +60,7 @@ Suggests: scRNAseq, shiny, testthat, + DelayedArray, visNetwork, doParallel, batchtools, diff --git a/NAMESPACE b/NAMESPACE index 227e49a..e7bd09a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ import(SingleCellExperiment) import(SummarizedExperiment) import(gplots) import(methods) +importClassesFrom(SparseArray,SparseMatrix) importClassesFrom(SummarizedExperiment,SummarizedExperiment) importFrom(DelayedMatrixStats,colMins) importFrom(DelayedMatrixStats,colSums2) @@ -52,6 +53,7 @@ importFrom(MatrixGenerics,colMins) importFrom(MatrixGenerics,colSums2) importFrom(RColorBrewer,brewer.pal) importFrom(RUVSeq,RUVg) +importFrom(SparseArray,nzwhich) importFrom(aroma.light,normalizeQuantileRank.matrix) importFrom(boot,inv.logit) importFrom(boot,logit) diff --git a/R/SCONE_DEFAULTS.R b/R/SCONE_DEFAULTS.R index 56dfbfd..8f28848 100644 --- a/R/SCONE_DEFAULTS.R +++ b/R/SCONE_DEFAULTS.R @@ -154,7 +154,8 @@ SCRAN_FN = function(ei){ #' eo <- PSINORM_FN(ei) #' PSINORM_FN = function(ei){ - inv_sf <- pareto.MLE(ei+1) - eo = t(t(ei) / mean(inv_sf) * inv_sf) + tei <- t(ei) + sf <- compute_transposed_PsiNorm_sf(tei) + eo <- t(mat_v_div(tei, mean(1 / sf) * sf)) return(eo) } diff --git a/R/pareto_norm.R b/R/pareto_norm.R index ef46e9c..6d8da57 100644 --- a/R/pareto_norm.R +++ b/R/pareto_norm.R @@ -54,22 +54,79 @@ setMethod( #' @export #' @importFrom DelayedMatrixStats colSums2 colMins #' @importFrom sparseMatrixStats colSums2 colMins +#' @importClassesFrom SparseArray SparseMatrix +#' @importFrom SparseArray nzwhich setMethod( f = "PsiNorm", signature = signature(x = "ANY"), definition = function(x){ - inv_sf <- pareto.MLE(x+1) - t(t(x) * inv_sf) + ## Uses only two transpostions (instead of four in original implementation). + ## Preserves sparsity. + tx <- t(x) + sf <- compute_transposed_PsiNorm_sf(tx) + t(mat_v_div(tx, sf)) }) -# can we simplify the function and assume min is always 1? -pareto.MLE <- function(sce) { - n <- nrow(sce) - m <- MatrixGenerics::colMins(sce) - a <- n/MatrixGenerics::colSums2(t(t(log(sce)) - log(m))) - return(a) +## Preserves sparsity. +compute_transposed_PsiNorm_sf <- function(tx) { + ## Temporary workaround until operations from the Math group (e.g. log1p) + ## are supported on a SparseMatrix object of type "integer". + ## See https://github.com/Bioconductor/SparseArray/blob/devel/TODO + if (type(tx) != "double") + type(tx) <- "double" + x2 <- log1p(tx) + m2 <- MatrixGenerics::rowMins(x2) + x2 <- mat_v_sub(x2, m2) + MatrixGenerics::rowSums2(x2) / ncol(tx) } +## Preserves sparsity. computePsiNormSF <- function(x) { - 1/pareto.MLE(x+1) + compute_transposed_PsiNorm_sf(t(x)) } + +.build_linear_index <- function(nr, nc, i) { + stopifnot(is.integer(nr), length(nr) == 1L, + is.integer(nc), length(nc) == 1L, is.integer(i)) + rep.int(nr * (seq_len(nc) - 1L), rep.int(length(i), nc)) + i +} + +## Implements optimized substraction between a matrix-like object 'mat' and +## an ordinary vector 'v' with mostly zeros that accepts a SparseMatrix +## object (in which case it also returns a SparseMatrix object). Note that +## the SparseArray package does not support this operation out-of-the-box at +## the moment. +mat_v_sub <- function(mat, v) { + mat_dim <- dim(mat) + stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]]) + nzidx <- nzwhich(v) # indices of nonzero values in 'v' + if (length(nzidx) == 0L) + return(mat) # no-op + if (!is(mat, "SparseMatrix")) + return(mat - v) + ## Works well if 'nzidx' is short (i.e. 'v' contains mostly zeros). + Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], nzidx) + mat[Lidx] <- mat[Lidx] - v[nzidx] + mat +} + +## Implements division between a matrix-like object 'mat' and an ordinary +## vector 'v' with mostly nonzero/non-NA values that accepts a SparseMatrix +## object (in which case it also returns a SparseMatrix object). Note that +## the SparseArray package only supports this operation when 'v' contains +## no zeros or NAs at the moment. +mat_v_div <- function(mat, v) { + mat_dim <- dim(mat) + stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]]) + idx <- which(is.na(v) | v == 0) + if (length(idx) == 0L || !is(mat, "SparseMatrix")) + return(mat / v) + ## Works well if 'idx' is short (i.e. if 'v' contains very few zeros/NAs). + v2 <- v + v2[idx] <- 1L + mat <- mat / v2 + Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], idx) + mat[Lidx] <- mat[Lidx] / v[idx] + mat +} + diff --git a/tests/testthat/test-pareto_norm.R b/tests/testthat/test-pareto_norm.R index 055fc67..27ca00b 100644 --- a/tests/testthat/test-pareto_norm.R +++ b/tests/testthat/test-pareto_norm.R @@ -1,14 +1,33 @@ test_that("PSiNorm works with all input classes", { - m <- matrix(c(1,0,2,0,2,9,3,0), ncol=2) - + m <- matrix(0L, nrow=4, ncol=5) + m[c(1,3) , 1] <- 1:2 + m[1:3, 3] <- c(2L, 9L, 3L) + m[ , 4] <- 2:1 + m[2, 5] <- 1L + out1 <- PsiNorm(m) - + + svt <- as(m, "SVT_SparseMatrix") + out_svt <- PsiNorm(svt) + expect_true(is(out_svt, "SVT_SparseMatrix")) + expect_equal(out1, as.matrix(out_svt)) + + dm1 <- DelayedArray::DelayedArray(m) + out_dm1 <- PsiNorm(dm1) + expect_true(is(out_dm1, "DelayedMatrix")) + expect_equal(out1, as.matrix(out_dm1)) + + dm2 <- DelayedArray::DelayedArray(svt) + out_dm2 <- PsiNorm(dm2) + expect_true(is(out_dm2, "DelayedMatrix")) + expect_equal(out1, as.matrix(out_dm2)) + se <- SummarizedExperiment(m) sce <- SingleCellExperiment(m) - + out_se <- PsiNorm(se) expect_equal(out1, assay(out_se, "PsiNorm")) - + out_sce <- PsiNorm(sce, whichAssay = 1) out2 <- t(t(m)/sizeFactors(out_sce)) expect_equal(out1, out2)