Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: scone
Version: 1.31.0
Version: 1.31.1
Title: Single Cell Overview of Normalized Expression data
Description: SCONE is an R package for comparing and ranking the performance of
different normalization schemes for single-cell RNA-seq and other
Expand Down Expand Up @@ -44,7 +44,8 @@ Imports:
MatrixGenerics,
SingleCellExperiment,
DelayedMatrixStats,
sparseMatrixStats
sparseMatrixStats,
SparseArray (>= 1.7.6)
Suggests:
BiocStyle,
DT,
Expand All @@ -59,6 +60,7 @@ Suggests:
scRNAseq,
shiny,
testthat,
DelayedArray,
visNetwork,
doParallel,
batchtools,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,15 @@ import(SingleCellExperiment)
import(SummarizedExperiment)
import(gplots)
import(methods)
importClassesFrom(SparseArray,SparseMatrix)
importClassesFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(DelayedMatrixStats,colMins)
importFrom(DelayedMatrixStats,colSums2)
importFrom(MatrixGenerics,colMins)
importFrom(MatrixGenerics,colSums2)
importFrom(RColorBrewer,brewer.pal)
importFrom(RUVSeq,RUVg)
importFrom(SparseArray,nzwhich)
importFrom(aroma.light,normalizeQuantileRank.matrix)
importFrom(boot,inv.logit)
importFrom(boot,logit)
Expand Down
5 changes: 3 additions & 2 deletions R/SCONE_DEFAULTS.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ SCRAN_FN = function(ei){
#' eo <- PSINORM_FN(ei)
#'
PSINORM_FN = function(ei){
inv_sf <- pareto.MLE(ei+1)
eo = t(t(ei) / mean(inv_sf) * inv_sf)
tei <- t(ei)
sf <- compute_transposed_PsiNorm_sf(tei)
eo <- t(mat_v_div(tei, mean(1 / sf) * sf))
return(eo)
}
75 changes: 66 additions & 9 deletions R/pareto_norm.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,79 @@ setMethod(
#' @export
#' @importFrom DelayedMatrixStats colSums2 colMins
#' @importFrom sparseMatrixStats colSums2 colMins
#' @importClassesFrom SparseArray SparseMatrix
#' @importFrom SparseArray nzwhich
setMethod(
f = "PsiNorm",
signature = signature(x = "ANY"),
definition = function(x){
inv_sf <- pareto.MLE(x+1)
t(t(x) * inv_sf)
## Uses only two transpostions (instead of four in original implementation).
## Preserves sparsity.
tx <- t(x)
sf <- compute_transposed_PsiNorm_sf(tx)
t(mat_v_div(tx, sf))
})

# can we simplify the function and assume min is always 1?
pareto.MLE <- function(sce) {
n <- nrow(sce)
m <- MatrixGenerics::colMins(sce)
a <- n/MatrixGenerics::colSums2(t(t(log(sce)) - log(m)))
return(a)
## Preserves sparsity.
compute_transposed_PsiNorm_sf <- function(tx) {
## Temporary workaround until operations from the Math group (e.g. log1p)
## are supported on a SparseMatrix object of type "integer".
## See https://github.com/Bioconductor/SparseArray/blob/devel/TODO
if (type(tx) != "double")
type(tx) <- "double"
x2 <- log1p(tx)
m2 <- MatrixGenerics::rowMins(x2)
x2 <- mat_v_sub(x2, m2)
MatrixGenerics::rowSums2(x2) / ncol(tx)
}

## Preserves sparsity.
computePsiNormSF <- function(x) {
1/pareto.MLE(x+1)
compute_transposed_PsiNorm_sf(t(x))
}

.build_linear_index <- function(nr, nc, i) {
stopifnot(is.integer(nr), length(nr) == 1L,
is.integer(nc), length(nc) == 1L, is.integer(i))
rep.int(nr * (seq_len(nc) - 1L), rep.int(length(i), nc)) + i
}

## Implements optimized substraction between a matrix-like object 'mat' and
## an ordinary vector 'v' with mostly zeros that accepts a SparseMatrix
## object (in which case it also returns a SparseMatrix object). Note that
## the SparseArray package does not support this operation out-of-the-box at
## the moment.
mat_v_sub <- function(mat, v) {
mat_dim <- dim(mat)
stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]])
nzidx <- nzwhich(v) # indices of nonzero values in 'v'
if (length(nzidx) == 0L)
return(mat) # no-op
if (!is(mat, "SparseMatrix"))
return(mat - v)
## Works well if 'nzidx' is short (i.e. 'v' contains mostly zeros).
Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], nzidx)
mat[Lidx] <- mat[Lidx] - v[nzidx]
mat
}

## Implements division between a matrix-like object 'mat' and an ordinary
## vector 'v' with mostly nonzero/non-NA values that accepts a SparseMatrix
## object (in which case it also returns a SparseMatrix object). Note that
## the SparseArray package only supports this operation when 'v' contains
## no zeros or NAs at the moment.
mat_v_div <- function(mat, v) {
mat_dim <- dim(mat)
stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]])
idx <- which(is.na(v) | v == 0)
if (length(idx) == 0L || !is(mat, "SparseMatrix"))
return(mat / v)
## Works well if 'idx' is short (i.e. if 'v' contains very few zeros/NAs).
v2 <- v
v2[idx] <- 1L
mat <- mat / v2
Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], idx)
mat[Lidx] <- mat[Lidx] / v[idx]
mat
}

29 changes: 24 additions & 5 deletions tests/testthat/test-pareto_norm.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,33 @@
test_that("PSiNorm works with all input classes", {
m <- matrix(c(1,0,2,0,2,9,3,0), ncol=2)

m <- matrix(0L, nrow=4, ncol=5)
m[c(1,3) , 1] <- 1:2
m[1:3, 3] <- c(2L, 9L, 3L)
m[ , 4] <- 2:1
m[2, 5] <- 1L

out1 <- PsiNorm(m)


svt <- as(m, "SVT_SparseMatrix")
out_svt <- PsiNorm(svt)
expect_true(is(out_svt, "SVT_SparseMatrix"))
expect_equal(out1, as.matrix(out_svt))

dm1 <- DelayedArray::DelayedArray(m)
out_dm1 <- PsiNorm(dm1)
expect_true(is(out_dm1, "DelayedMatrix"))
expect_equal(out1, as.matrix(out_dm1))

dm2 <- DelayedArray::DelayedArray(svt)
out_dm2 <- PsiNorm(dm2)
expect_true(is(out_dm2, "DelayedMatrix"))
expect_equal(out1, as.matrix(out_dm2))

se <- SummarizedExperiment(m)
sce <- SingleCellExperiment(m)

out_se <- PsiNorm(se)
expect_equal(out1, assay(out_se, "PsiNorm"))

out_sce <- PsiNorm(sce, whichAssay = 1)
out2 <- t(t(m)/sizeFactors(out_sce))
expect_equal(out1, out2)
Expand Down
Loading