diff --git a/.Rbuildignore b/.Rbuildignore
index 40e9f50..4fc5fa0 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,3 +4,5 @@
^README\.Rmd$
^\.github$
^codecov\.yml$
+^pkgdown$
+^README\.html$
diff --git a/DESCRIPTION b/DESCRIPTION
index e961c29..8331b9d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,17 +15,24 @@ BugReports: https://github.com/microbiome/daa/issues/
biocViews: Software, Microbiome
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
Depends:
- R (>= 4.5.0)
+ R (>= 4.5.0),
+ mia
Imports:
- dplyr
+ dplyr,
+ methods,
+ miaViz,
+ purrr,
+ rstatix,
+ S4Vectors,
+ SingleCellExperiment,
+ SummarizedExperiment
Suggests:
BiocStyle,
knitr,
- RefManageR,
+ MultiAssayExperiment,
rmarkdown,
- sessioninfo,
testthat
Config/testthat/edition: 3
VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 6ae9268..e5bf65f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,26 @@
# Generated by roxygen2: do not edit by hand
+export(addTtest)
+export(addWilcoxon)
+export(getTtest)
+export(getWilcoxon)
+exportMethods(addTtest)
+exportMethods(addWilcoxon)
+exportMethods(getTtest)
+exportMethods(getWilcoxon)
+importFrom(S4Vectors,DataFrame)
+importFrom(SingleCellExperiment,altExp)
+importFrom(SummarizedExperiment,assay)
+importFrom(SummarizedExperiment,colData)
+importFrom(SummarizedExperiment,rowData)
+importFrom(dplyr,across)
+importFrom(dplyr,all_of)
+importFrom(dplyr,arrange)
+importFrom(dplyr,group_split)
+importFrom(methods,callNextMethod)
+importFrom(mia,meltSE)
+importFrom(purrr,map_df)
+importFrom(rstatix,adjust_pvalue)
+importFrom(rstatix,t_test)
+importFrom(rstatix,wilcox_test)
+importFrom(stats,as.formula)
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
new file mode 100644
index 0000000..2661b79
--- /dev/null
+++ b/R/AllGenerics.R
@@ -0,0 +1,25 @@
+# This file contains all the generics
+
+#' @rdname getWilcoxon
+#' @export
+setGeneric( "getWilcoxon", signature = "x", function ( x, ... ){
+ standardGeneric("getWilcoxon")
+})
+
+#' @rdname getWilcoxon
+#' @export
+setGeneric( "addWilcoxon", signature = "x", function ( x, ... ){
+ standardGeneric("addWilcoxon")
+})
+
+#' @rdname getTtest
+#' @export
+setGeneric( "getTtest", signature = "x", function ( x, ... ){
+ standardGeneric("getTtest")
+})
+
+#' @rdname getTtest
+#' @export
+setGeneric( "addTtest", signature = "x", function ( x, ... ){
+ standardGeneric("addTtest")
+})
diff --git a/R/getTtest.R b/R/getTtest.R
new file mode 100644
index 0000000..0455782
--- /dev/null
+++ b/R/getTtest.R
@@ -0,0 +1,149 @@
+#' @name
+#' getTtest
+#'
+#' @title
+#' Perform t-test
+#'
+#' @description
+#' These functions perform t-test to compare values between two groups.
+#'
+#' @details
+#' By default, Welch's t-test is used which does not assume equal variances.
+#' Set \code{var.equal = TRUE} for Student's t-test.
+#'
+#' Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var}
+#' to define the values to test.
+#'
+#' @return
+#' \code{getTtest} returns a \code{DataFrame} with test results.
+#' \code{addTtest} returns \code{x} with results added to \code{rowData(x)}.
+#'
+#' @param x A \code{SummarizedExperiment} object.
+#'
+#' @param assay.type \code{Character scalar} or \code{NULL}. Specifies assay to
+#' test. Tests are run per feature. (Default: \code{NULL})
+#'
+#' @param row.var \code{Character scalar} or \code{NULL}. Specifies variable
+#' from \code{rowData(x)} to test. (Default: \code{NULL})
+#'
+#' @param col.var \code{Character scalar} or \code{NULL}. Specifies variable
+#' from \code{colData(x)} to test. (Default: \code{NULL})
+#'
+#' @param formula \code{formula}. A formula specifying the grouping variable,
+#' e.g., \code{~ SampleType}. The RHS specifies the comparison groups.
+#' For >2 levels, pairwise comparisons are performed.
+#'
+#' @param split.by \code{Character vector} or \code{NULL}. Columns to split by.
+#' Tests are run separately for each combination. (Default: \code{NULL})
+#'
+#' @param pair.by \code{Character scalar} or \code{NULL}. Column for pairing
+#' samples in paired tests. (Default: \code{NULL})
+#'
+#' @param features \code{Character vector} or \code{NULL}. Specific features
+#' to test when using \code{assay.type}. (Default: \code{NULL})
+#'
+#' @param var.equal \code{Logical scalar}. Assume equal variances?
+#' (Default: \code{FALSE})
+#'
+#' @param p.adjust.method \code{Character scalar}. Method for p-value
+#' adjustment. (Default: \code{"fdr"})
+#'
+#' @param name \code{Character scalar}. Column name prefix for results.
+#' (Default: \code{"ttest"})
+#'
+#' @param ... Additional arguments passed to \code{rstatix::t_test}.
+#'
+#' @examples
+#' data(GlobalPatterns, package = "mia")
+#' tse <- GlobalPatterns
+#' tse <- tse[1:50, tse$SampleType %in% c("Feces", "Skin")]
+#'
+#' # Test assay values (per feature)
+#' res <- getTtest(tse, assay.type = "counts", formula = ~SampleType)
+#'
+#' # Test colData variable (e.g., alpha diversity)
+#' tse <- mia::addAlpha(tse, index = "shannon_diversity")
+#' res <- getTtest(tse, col.var = "shannon_diversity", formula = ~SampleType)
+#'
+#' @seealso
+#' \code{\link[rstatix:t_test]{rstatix::t_test}},
+#' \code{\link[daa:getWilcoxon]{getWilcoxon}}
+#'
+#' @export
+NULL
+
+#' @rdname getTtest
+#' @export
+#' @importFrom SingleCellExperiment altExp
+#' @importFrom methods callNextMethod
+setMethod("getTtest", "SingleCellExperiment", function (x, ... ){
+ x <- .check_and_get_altExp(x, ...)
+ res <- callNextMethod(x, ...)
+ return(res)
+})
+
+#' @rdname getTtest
+#' @export
+#' @importFrom SummarizedExperiment assay colData rowData
+#' @importFrom rstatix t_test adjust_pvalue
+#' @importFrom S4Vectors DataFrame
+setMethod(
+ "getTtest", "SummarizedExperiment",
+ function (x, assay.type = NULL, row.var = NULL, col.var = NULL,
+ formula, split.by = NULL, pair.by = NULL, features = NULL,
+ var.equal = FALSE, p.adjust.method = "fdr", ... ){
+ ############################# Input check ##############################
+ group <- .check_input(
+ x, assay.type, row.var, col.var, formula,
+ split.by, pair.by, features
+ )
+ if( !.is_a_bool(var.equal) ){
+ stop("'var.equal' must be TRUE or FALSE.", call. = FALSE)
+ }
+ ########################### Input check end ############################
+ # Get y variable name (the column to test)
+ y <- c(assay.type, row.var, col.var)
+ # Get data based on source
+ df <- .get_data(
+ x, assay.type, row.var, col.var, group,
+ split.by, pair.by, features
+ )
+ # Run tests
+ paired <- !is.null(pair.by)
+ res <- .run_ttest(
+ df, y, group, split.by, paired, var.equal,
+ p.adjust.method, features, ...
+ )
+ return(res)
+ }
+)
+
+#' @rdname getTtest
+#' @export
+setMethod(
+ "addTtest", "SummarizedExperiment",
+ function (x, name = "ttest", ... ){
+ if( !.is_non_empty_string(name) ){
+ stop("'name' must be a single character value.", call. = FALSE)
+ }
+ res <- getTtest(x, ...)
+ x <- .add_values_to_colData(x, list(res$p.adj), paste0(name, "_padj"),
+ MARGIN = 1
+ )
+ return(x)
+ }
+)
+
+################################################################################
+# Internal function using .calc_daa engine
+################################################################################
+
+#' @importFrom rstatix t_test
+.run_ttest <- function (df, y, group, split.by, paired, var.equal,
+ p.adjust.method, features = NULL, ... ){
+ .calc_daa(
+ df = df, y = y, group = group, split.by = split.by,
+ paired = paired, FUN = rstatix::t_test,
+ p.adjust.method = p.adjust.method, features = features, var.equal = var.equal, ...
+ )
+}
diff --git a/R/getWilcoxon.R b/R/getWilcoxon.R
new file mode 100644
index 0000000..734248d
--- /dev/null
+++ b/R/getWilcoxon.R
@@ -0,0 +1,144 @@
+#' @name
+#' getWilcoxon
+#'
+#' @title
+#' Perform Wilcoxon rank-sum test
+#'
+#' @description
+#' These functions perform Wilcoxon rank-sum test (Mann-Whitney U) to compare
+#' values between two groups.
+#'
+#' @details
+#' The Wilcoxon rank-sum test is a non-parametric test used to compare two
+#' independent groups. It is suitable when data does not meet normality
+#' assumptions.
+#'
+#' Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var}
+#' to define the values to test.
+#'
+#' @return
+#' \code{getWilcoxon} returns a \code{DataFrame} with test results.
+#' \code{addWilcoxon} returns \code{x} with results added to \code{rowData(x)}.
+#'
+#' @param x A \code{SummarizedExperiment} object.
+#'
+#' @param assay.type \code{Character scalar} or \code{NULL}. Specifies assay to
+#' test. Tests are run per feature. (Default: \code{NULL})
+#'
+#' @param row.var \code{Character scalar} or \code{NULL}. Specifies variable
+#' from \code{rowData(x)} to test. (Default: \code{NULL})
+#'
+#' @param col.var \code{Character scalar} or \code{NULL}. Specifies variable
+#' from \code{colData(x)} to test. (Default: \code{NULL})
+#'
+#' @param formula \code{formula}. A formula specifying the grouping variable,
+#' e.g., \code{~ SampleType}. The RHS specifies the comparison groups.
+#' For >2 levels, pairwise comparisons are performed.
+#'
+#' @param split.by \code{Character vector} or \code{NULL}. Columns to split by.
+#' Tests are run separately for each combination. (Default: \code{NULL})
+#'
+#' @param pair.by \code{Character scalar} or \code{NULL}. Column for pairing
+#' samples in paired tests. (Default: \code{NULL})
+#'
+#' @param features \code{Character vector} or \code{NULL}. Specific features
+#' to test when using \code{assay.type}. (Default: \code{NULL})
+#'
+#' @param p.adjust.method \code{Character scalar}. Method for p-value
+#' adjustment. (Default: \code{"fdr"})
+#'
+#' @param name \code{Character scalar}. Column name prefix for results.
+#' (Default: \code{"wilcoxon"})
+#'
+#' @param ... Additional arguments passed to \code{rstatix::wilcox_test}.
+#'
+#' @examples
+#' data(GlobalPatterns, package = "mia")
+#' tse <- GlobalPatterns
+#' tse <- tse[1:50, tse$SampleType %in% c("Feces", "Skin")]
+#'
+#' # Test assay values (per feature)
+#' res <- getWilcoxon(tse, assay.type = "counts", formula = ~SampleType)
+#'
+#' # Test colData variable (e.g., alpha diversity)
+#' tse <- mia::addAlpha(tse, index = "shannon_diversity")
+#' res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~SampleType)
+#'
+#' @seealso
+#' \code{\link[rstatix:wilcox_test]{rstatix::wilcox_test}},
+#' \code{\link[daa:getTtest]{getTtest}}
+#'
+#' @export
+NULL
+
+#' @rdname getWilcoxon
+#' @export
+#' @importFrom SingleCellExperiment altExp
+#' @importFrom methods callNextMethod
+setMethod("getWilcoxon", "SingleCellExperiment", function (x, ... ){
+ x <- .check_and_get_altExp(x, ...)
+ res <- callNextMethod(x, ...)
+ return(res)
+})
+
+#' @rdname getWilcoxon
+#' @export
+#' @importFrom SummarizedExperiment assay colData rowData
+#' @importFrom rstatix wilcox_test adjust_pvalue
+#' @importFrom S4Vectors DataFrame
+setMethod(
+ "getWilcoxon", "SummarizedExperiment",
+ function (x, assay.type = NULL, row.var = NULL, col.var = NULL,
+ formula, split.by = NULL, pair.by = NULL, features = NULL,
+ p.adjust.method = "fdr", ... ){
+ ############################# Input check ##############################
+ group <- .check_input(
+ x, assay.type, row.var, col.var, formula,
+ split.by, pair.by, features
+ )
+ ########################### Input check end ############################
+ # Get y variable name (the column to test)
+ y <- c(assay.type, row.var, col.var)
+ # Get data based on source
+ df <- .get_data(
+ x, assay.type, row.var, col.var, group,
+ split.by, pair.by, features
+ )
+ # Run tests
+ paired <- !is.null(pair.by)
+ res <- .run_wilcoxon(
+ df, y, group, split.by, paired,
+ p.adjust.method, features, ...
+ )
+ return(res)
+ }
+)
+
+#' @rdname getWilcoxon
+#' @export
+setMethod(
+ "addWilcoxon", "SummarizedExperiment",
+ function (x, name = "wilcoxon", ... ){
+ if( !.is_non_empty_string(name) ){
+ stop("'name' must be a single character value.", call. = FALSE)
+ }
+ res <- getWilcoxon(x, ...)
+ x <- .add_values_to_colData(x, list(res$p.adj), paste0(name, "_padj"),
+ MARGIN = 1
+ )
+ return(x)
+ }
+)
+
+################################################################################
+# Internal function using .calc_daa engine
+################################################################################
+
+#' @importFrom rstatix wilcox_test
+.run_wilcoxon <- function (df, y, group, split.by, paired, p.adjust.method, features = NULL, ... ){
+ .calc_daa(
+ df = df, y = y, group = group, split.by = split.by,
+ paired = paired, FUN = rstatix::wilcox_test,
+ p.adjust.method = p.adjust.method, features = features, ...
+ )
+}
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..4c6f4da
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,306 @@
+################################################################################
+# Internal methods imported from mia
+################################################################################
+
+.is_a_bool <- mia:::.is_a_bool
+.is_non_empty_string <- mia:::.is_non_empty_string
+.is_non_empty_character <- mia:::.is_non_empty_character
+.check_assay_present <- mia:::.check_assay_present
+.check_and_get_altExp <- mia:::.check_and_get_altExp
+.add_values_to_colData <- mia:::.add_values_to_colData
+
+################################################################################
+# Internal methods imported from miaViz
+################################################################################
+
+.check_metadata_variable <- miaViz:::.check_metadata_variable
+
+################################################################################
+# Input validation - matching plotBoxplot's pattern
+################################################################################
+
+#' Check inputs for statistical tests
+#' @keywords internal
+#' @noRd
+#' @importFrom SummarizedExperiment colData rowData
+.check_input <- function (x, assay.type, row.var, col.var, formula,
+ split.by, pair.by, features ){
+ # Either assay.type, row.var or col.var must be specified
+ if( sum(c(is.null(assay.type), is.null(row.var), is.null(col.var))) != 2L ){
+ stop("Please specify either 'assay.type', 'row.var', or 'col.var'.",
+ call. = FALSE
+ )
+ }
+ # features cannot be specified if row.var or col.var is specified
+ if( is.null(assay.type) && !is.null(features) ){
+ stop("'features' can only be specified when 'assay.type' is specified.",
+ call. = FALSE
+ )
+ }
+ # As features points to rownames, the TSE must have rownames
+ if( !is.null(features) && is.null(rownames(x)) ){
+ stop("'x' must have rownames.", call. = FALSE)
+ }
+ if( !(is.null(features) ||
+ (is.character(features) && all(features %in% rownames(x)))) ){
+ stop("'features' must be NULL or character vector specifying rownames.",
+ call. = FALSE
+ )
+ }
+ # If assay was specified, check that it is correct
+ if( !is.null(assay.type) ){
+ .check_assay_present(assay.type, x)
+ }
+ # Check row.var exists in rowData
+ if( !is.null(row.var) ){
+ if( !.is_non_empty_string(row.var) ){
+ stop("'row.var' must be a single character value.", call. = FALSE)
+ }
+ if( !row.var %in% colnames(rowData(x)) ){
+ stop("'", row.var, "' not found in rowData(x).", call. = FALSE)
+ }
+ }
+ # Check col.var exists in colData
+ if( !is.null(col.var) ){
+ if( !.is_non_empty_string(col.var) ){
+ stop("'col.var' must be a single character value.", call. = FALSE)
+ }
+ if( !col.var %in% colnames(colData(x)) ){
+ stop("'", col.var, "' not found in colData(x).", call. = FALSE)
+ }
+ }
+ # Check formula
+ group <- .get_rhs_var(formula)
+ # Check group variable exists in appropriate metadata
+ .check_metadata_var(x, group, assay.type, row.var, col.var)
+ # Check split.by variables
+ if( !is.null(split.by) ){
+ if( !is.character(split.by) ){
+ stop("'split.by' must be a character vector.", call. = FALSE)
+ }
+ for( var in split.by ){
+ .check_metadata_var(x, var, assay.type, row.var, col.var)
+ }
+ }
+ # Check pair.by variable
+ if( !is.null(pair.by) ){
+ if( !.is_non_empty_string(pair.by) ){
+ stop("'pair.by' must be a single character value.", call. = FALSE)
+ }
+ .check_metadata_var(x, pair.by, assay.type, row.var, col.var)
+ }
+ # Return group for use in caller
+ group
+}
+
+#' Check metadata variable exists in appropriate location
+#' @keywords internal
+#' @noRd
+#' @importFrom SummarizedExperiment colData rowData
+.check_metadata_var <- function (x, var, assay.type, row.var, col.var ){
+ # When row.var is used, check rowData
+ # When assay.type or col.var is used, check colData
+ .check_metadata_variable(
+ tse = x,
+ var = var,
+ row = !is.null(row.var),
+ col = !is.null(assay.type) || !is.null(col.var)
+ )
+ invisible(NULL)
+}
+
+#' Extract RHS variable from formula
+#' @keywords internal
+#' @noRd
+.get_rhs_var <- function (formula ){
+ if( !inherits(formula, "formula") ){
+ stop("'formula' must be a formula.", call. = FALSE)
+ }
+ # Get RHS of formula (handles both ~ group and value ~ group)
+ rhs <- as.character(formula)[length(as.character(formula))]
+ if( length(rhs) != 1 || rhs == "" ){
+ stop("Formula must specify a grouping variable.", call. = FALSE)
+ }
+ rhs
+}
+
+#' Check group has at least 2 levels
+#' @keywords internal
+#' @noRd
+.check_group <- function (df, group ){
+ if( !group %in% names(df) ){
+ stop("'", group, "' not found in data.", call. = FALSE)
+ }
+ vals <- unique(df[[group]])
+ vals <- vals[!is.na(vals)]
+ if( length(vals) < 2 ){
+ stop("'group' must have at least 2 levels. Found ", length(vals), ".",
+ call. = FALSE
+ )
+ }
+ invisible(NULL)
+}
+
+################################################################################
+# Data extraction - matching plotBoxplot's approach
+################################################################################
+
+#' Get data for testing based on source
+#' @keywords internal
+#' @noRd
+#' @importFrom SummarizedExperiment colData rowData
+#' @importFrom mia meltSE
+.get_data <- function (x, assay.type, row.var, col.var,
+ group, split.by, pair.by, features ){
+ # Collect all variable names needed
+ all_vars <- c(group, split.by, pair.by)
+ all_vars <- unique(all_vars[!sapply(all_vars, is.null)])
+
+ # Get data based on source - matching getDA's smart routing pattern
+ if( !is.null(assay.type) ){
+ # Automatically detect which variables are in rowData vs colData
+ row_vars <- vapply(all_vars, function (v ){
+ v %in% colnames(rowData(x))
+ }, logical(1L))
+ col_vars <- all_vars[!row_vars]
+ row_vars <- all_vars[row_vars]
+ # Use meltSE with smart routing
+ df <- meltSE(
+ x,
+ assay.type = assay.type,
+ add.col = c(col.var, pair.by, col_vars),
+ add.row = c(row.var, row_vars)
+ )
+ }
+ # If row.var was specified, get the data from rowData
+ if( !is.null(row.var) ){
+ df <- rowData(x)[, c(row.var, all_vars), drop = FALSE]
+ }
+ # If col.var was specified, get the data from colData
+ if( !is.null(col.var) ){
+ df <- colData(x)[, c(col.var, pair.by, all_vars), drop = FALSE]
+ }
+
+ df <- as.data.frame(df)
+ attr(df, "pair.by") <- pair.by
+ .check_group(df, group)
+
+ # Validate dependent variable is numeric
+ y_var <- if( !is.null(assay.type)) assay.type else if( !is.null(row.var)) row.var else col.var
+ if( !is.numeric(df[[y_var]]) ){
+ stop("The dependent variable must be numeric.", call. = FALSE)
+ }
+
+ df
+}
+
+################################################################################
+# Universal DAA Engine
+################################################################################
+
+#' Unified engine for differential abundance analysis
+#' @keywords internal
+#' @noRd
+#' @importFrom rstatix adjust_pvalue
+#' @importFrom S4Vectors DataFrame
+#' @importFrom dplyr across all_of group_split arrange
+#' @importFrom purrr map_df
+#' @importFrom stats as.formula
+.calc_daa <- function (df, y, group, split.by, paired, FUN,
+ p.adjust.method = "fdr", features = NULL, ... ){
+ # Identify pairing variable
+ pair.by <- attr(df, "pair.by")
+
+ # Combine grouping vars: FeatureID (from meltSE) + split.by
+ grouping_vars <- c(split.by)
+ if( "FeatureID" %in% names(df) ){
+ grouping_vars <- c("FeatureID", grouping_vars)
+ }
+
+ # Build formula: y ~ group
+ formula <- as.formula(paste0(y, " ~ ", group))
+
+ # Run tests per group (FeatureID + split.by)
+ if( length(grouping_vars) > 0 ){
+ res <- df |>
+ group_split(across(all_of(grouping_vars))) |>
+ map_df(function (sub_df ){
+ # Ensure correct alignment for paired tests
+ if( paired && !is.null(pair.by) ){
+ sub_df <- sub_df |> arrange(across(all_of(pair.by)))
+ }
+
+ # Run test with error handling
+ test_res <- tryCatch(
+ {
+ FUN(sub_df, formula, paired = paired, ...)
+ },
+ error = function (e ){
+ # Print warning to terminal
+ msg <- conditionMessage(e)
+ warning("Statistical test failed for a feature: ", msg,
+ call. = FALSE
+ )
+ # Create empty/NA result with error message
+ out <- data.frame(
+ .y. = y,
+ group1 = NA_character_,
+ group2 = NA_character_,
+ n1 = NA_integer_,
+ n2 = NA_integer_,
+ statistic = NA_real_,
+ p = NA_real_,
+ warning = msg
+ )
+ return(out)
+ }
+ )
+
+ # Attach grouping info
+ group_info <- sub_df[1, grouping_vars, drop = FALSE]
+ res_with_groups <- cbind(test_res, group_info)
+ return(res_with_groups)
+ })
+ } else {
+ # Single test (no FeatureID/split.by)
+ if( paired && !is.null(pair.by) ){
+ df <- df |> arrange(across(all_of(pair.by)))
+ }
+
+ res <- tryCatch(
+ {
+ FUN(df, formula, paired = paired, ...)
+ },
+ error = function (e ){
+ msg <- conditionMessage(e)
+ warning("Statistical test failed: ", msg, call. = FALSE)
+ out <- data.frame(
+ .y. = y,
+ group1 = NA_character_,
+ group2 = NA_character_,
+ n1 = NA_integer_,
+ n2 = NA_integer_,
+ statistic = NA_real_,
+ p = NA_real_,
+ warning = msg
+ )
+ return(out)
+ }
+ )
+ }
+
+ # P-value adjustment
+ if( "p" %in% colnames(res) && any(!is.na(res$p)) ){
+ res <- res |> adjust_pvalue(method = p.adjust.method)
+ } else {
+ res$p.adj <- NA_real_
+ }
+
+ # Subset to relevant features AFTER statistical calculations
+ if( !is.null(features) && "FeatureID" %in% colnames(res) ){
+ res <- res[res$FeatureID %in% features, , drop = FALSE]
+ }
+
+ res <- DataFrame(res)
+ return(res)
+}
diff --git a/README.Rmd b/README.Rmd
index 5f5b812..6e5340d 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,31 +13,31 @@ knitr::opts_chunk$set(
)
```
-# daa
+# daa - Differential Abundance Analysis
-[](https://github.com/microbiome/daa/issues)
-[](https://github.com/microbiome/daa/pulls)
[](https://lifecycle.r-lib.org/articles/stages.html#experimental)
-[](https://bioconductor.org/checkResults/release/bioc-LATEST/daa)
-[](https://bioconductor.org/checkResults/devel/bioc-LATEST/daa)
-[](http://bioconductor.org/packages/stats/bioc/daa/)
-[](https://support.bioconductor.org/tag/daa)
-[](https://bioconductor.org/packages/release/bioc/html/daa.html#since)
-[](http://bioconductor.org/checkResults/devel/bioc-LATEST/daa/)
-[](https://bioconductor.org/packages/release/bioc/html/daa.html#since)
-[](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml)
+[](http://bioconductor.org/packages/release/bioc/html/daa.html)
[](https://app.codecov.io/gh/microbiome/daa)
-The goal of `daa` is to provide method for differential abundance analysis
-(DAA).
+## Using the package
-## Installation instructions
+This package provides functions for differential abundance analysis (DAA)
+of microbiome data. It works with `TreeSummarizedExperiment` and
+`SummarizedExperiment` objects from the miaverse ecosystem.
-Get the latest stable `R` release from [CRAN](http://cran.r-project.org/). Then
-install `daa` from [Bioconductor](http://bioconductor.org/) using the following
-code:
+**Available methods:**
+
+- `getWilcoxon()` / `addWilcoxon()` - Wilcoxon rank-sum test
+- `getTtest()` / `addTtest()` - t-test (Welch's or Student's)
+
+For more information, see the [function reference](https://microbiome.github.io/daa/reference/index.html)
+and the [Orchestrating Microbiome Analysis (OMA)](https://microbiome.github.io/OMA) online book.
+
+## Installation
+
+### Bioc-release
```{r 'install', eval = FALSE}
if (!requireNamespace("BiocManager", quietly = TRUE)) {
@@ -47,65 +47,24 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) {
BiocManager::install("daa")
```
-And the development version from [GitHub](https://github.com/microbiome/daa)
-with:
+### Bioc-devel
```{r 'install_dev', eval = FALSE}
-BiocManager::install("microbiome/daa")
-```
-
-## Example
-
-This is a basic example which shows you how to solve a common problem:
-
-```{r example, eval = requireNamespace('daa')}
-library("daa")
-## basic example code
-```
-
-What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so:
-
-```{r cars}
-summary(cars)
-```
-
-You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date.
-
-You can also embed plots, for example:
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+ install.packages("BiocManager")
+}
-```{r pressure, echo = FALSE}
-plot(pressure)
+BiocManager::install(version = "devel")
+BiocManager::install("daa")
```
-In that case, don't forget to commit and push the resulting figure files, so they display on GitHub!
-
-## Citation
+## Contributing
-Below is the citation output from using `citation('daa')` in R. Please
-run this yourself to check for any updates on how to cite __daa__.
-
-```{r 'citation', eval = requireNamespace('daa')}
-print(citation("daa"), bibtex = TRUE)
-```
-
-Please note that the `daa` was only made possible thanks to many other R and
-bioinformatics software authors, which are cited either in the vignettes and/or
-the paper(s) describing this package.
+Contributions are welcome in the form of feedback, issues, and pull requests.
+See [contributor guidelines](CONTRIBUTING.md).
## Code of Conduct
-Please note that the `daa` project is released with a
-[Contributor Code of Conduct](http://bioconductor.org/about/code-of-conduct/)
+Please note that the daa project is released with a
+[Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
By contributing to this project, you agree to abide by its terms.
-
-## Development tools
-
-* Continuous code testing is possible thanks to [GitHub actions](https://www.tidyverse.org/blog/2020/04/usethis-1-6-0/) through `r BiocStyle::CRANpkg('usethis')`, `r BiocStyle::CRANpkg('remotes')`, and `r BiocStyle::CRANpkg('rcmdcheck')` customized to use [Bioconductor's docker containers](https://www.bioconductor.org/help/docker/) and `r BiocStyle::Biocpkg('BiocCheck')`.
-* Code coverage assessment is possible thanks to [codecov](https://codecov.io/gh) and `r BiocStyle::CRANpkg('covr')`.
-* The [documentation website](http://microbiome.github.io/daa) is automatically updated thanks to `r BiocStyle::CRANpkg('pkgdown')`.
-* The code is styled automatically thanks to `r BiocStyle::CRANpkg('styler')`.
-* The documentation is formatted thanks to `r BiocStyle::CRANpkg('devtools')` and `r BiocStyle::CRANpkg('roxygen2')`.
-
-For more details, check the `dev` directory.
-
-This package was developed using `r BiocStyle::Biocpkg('biocthis')`.
diff --git a/README.md b/README.md
index 6bfef92..b4bc61f 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,36 @@
-# daa
+# daa - Differential Abundance Analysis
-[](https://github.com/microbiome/daa/issues)
-[](https://github.com/microbiome/daa/pulls)
[](https://lifecycle.r-lib.org/articles/stages.html#experimental)
-[](https://bioconductor.org/checkResults/release/bioc-LATEST/daa)
-[](https://bioconductor.org/checkResults/devel/bioc-LATEST/daa)
-[](http://bioconductor.org/packages/stats/bioc/daa/)
-[](https://support.bioconductor.org/tag/daa)
-[](https://bioconductor.org/packages/release/bioc/html/daa.html#since)
-[](http://bioconductor.org/checkResults/devel/bioc-LATEST/daa/)
-[](https://bioconductor.org/packages/release/bioc/html/daa.html#since)
-[](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml)
+[](http://bioconductor.org/packages/release/bioc/html/daa.html)
[](https://app.codecov.io/gh/microbiome/daa)
-The goal of `daa` is to provide method for differential abundance
-analysis (DAA).
+## Using the package
-## Installation instructions
+This package provides functions for differential abundance analysis
+(DAA) of microbiome data. It works with `TreeSummarizedExperiment` and
+`SummarizedExperiment` objects from the miaverse ecosystem.
-Get the latest stable `R` release from
-[CRAN](http://cran.r-project.org/). Then install `daa` from
-[Bioconductor](http://bioconductor.org/) using the following code:
+**Available methods:**
+
+- `getWilcoxon()` / `addWilcoxon()` - Wilcoxon rank-sum test
+- `getTtest()` / `addTtest()` - t-test (Welch’s or Student’s)
+
+For more information, see the [function
+reference](https://microbiome.github.io/daa/reference/index.html) and
+the [Orchestrating Microbiome Analysis
+(OMA)](https://microbiome.github.io/OMA) online book.
+
+## Installation
+
+### Bioc-release
``` r
if (!requireNamespace("BiocManager", quietly = TRUE)) {
@@ -47,103 +40,24 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) {
BiocManager::install("daa")
```
-And the development version from
-[GitHub](https://github.com/microbiome/daa) with:
-
-``` r
-BiocManager::install("microbiome/daa")
-```
-
-## Example
-
-This is a basic example which shows you how to solve a common problem:
+### Bioc-devel
``` r
-library("daa")
-## basic example code
-```
-
-What is special about using `README.Rmd` instead of just `README.md`?
-You can include R chunks like so:
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+ install.packages("BiocManager")
+}
-``` r
-summary(cars)
-#> speed dist
-#> Min. : 4.0 Min. : 2.00
-#> 1st Qu.:12.0 1st Qu.: 26.00
-#> Median :15.0 Median : 36.00
-#> Mean :15.4 Mean : 42.98
-#> 3rd Qu.:19.0 3rd Qu.: 56.00
-#> Max. :25.0 Max. :120.00
+BiocManager::install(version = "devel")
+BiocManager::install("daa")
```
-You’ll still need to render `README.Rmd` regularly, to keep `README.md`
-up-to-date.
-
-You can also embed plots, for example:
-
-
-
-In that case, don’t forget to commit and push the resulting figure
-files, so they display on GitHub!
-
-## Citation
-
-Below is the citation output from using `citation('daa')` in R. Please
-run this yourself to check for any updates on how to cite **daa**.
-
-``` r
-print(citation("daa"), bibtex = TRUE)
-#> To cite package 'daa' in publications use:
-#>
-#> Borman T, Lahti L, Muluh M (2025). _daa: Differential abundance
-#> analysis_. R package version 0.99.0,
-#> .
-#>
-#> A BibTeX entry for LaTeX users is
-#>
-#> @Manual{,
-#> title = {daa: Differential abundance analysis},
-#> author = {Tuomas Borman and Leo Lahti and Muluh Muluh},
-#> year = {2025},
-#> note = {R package version 0.99.0},
-#> url = {https://github.com/microbiome/daa},
-#> }
-```
+## Contributing
-Please note that the `daa` was only made possible thanks to many other R
-and bioinformatics software authors, which are cited either in the
-vignettes and/or the paper(s) describing this package.
+Contributions are welcome in the form of feedback, issues, and pull
+requests. See [contributor guidelines](CONTRIBUTING.md).
## Code of Conduct
-Please note that the `daa` project is released with a [Contributor Code
-of Conduct](http://bioconductor.org/about/code-of-conduct/) By
-contributing to this project, you agree to abide by its terms.
-
-## Development tools
-
-- Continuous code testing is possible thanks to [GitHub
- actions](https://www.tidyverse.org/blog/2020/04/usethis-1-6-0/)
- through *[usethis](https://CRAN.R-project.org/package=usethis)*,
- *[remotes](https://CRAN.R-project.org/package=remotes)*, and
- *[rcmdcheck](https://CRAN.R-project.org/package=rcmdcheck)* customized
- to use [Bioconductor’s docker
- containers](https://www.bioconductor.org/help/docker/) and
- *[BiocCheck](https://bioconductor.org/packages/3.22/BiocCheck)*.
-- Code coverage assessment is possible thanks to
- [codecov](https://codecov.io/gh) and
- *[covr](https://CRAN.R-project.org/package=covr)*.
-- The [documentation website](http://microbiome.github.io/daa) is
- automatically updated thanks to
- *[pkgdown](https://CRAN.R-project.org/package=pkgdown)*.
-- The code is styled automatically thanks to
- *[styler](https://CRAN.R-project.org/package=styler)*.
-- The documentation is formatted thanks to
- *[devtools](https://CRAN.R-project.org/package=devtools)* and
- *[roxygen2](https://CRAN.R-project.org/package=roxygen2)*.
-
-For more details, check the `dev` directory.
-
-This package was developed using
-*[biocthis](https://bioconductor.org/packages/3.22/biocthis)*.
+Please note that the daa project is released with a [Contributor Code of
+Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html).
+By contributing to this project, you agree to abide by its terms.
diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png
deleted file mode 100644
index e208311..0000000
Binary files a/man/figures/README-pressure-1.png and /dev/null differ
diff --git a/man/getTtest.Rd b/man/getTtest.Rd
new file mode 100644
index 0000000..1553598
--- /dev/null
+++ b/man/getTtest.Rd
@@ -0,0 +1,99 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AllGenerics.R, R/getTtest.R
+\name{getTtest}
+\alias{getTtest}
+\alias{addTtest}
+\alias{getTtest,SingleCellExperiment-method}
+\alias{getTtest,SummarizedExperiment-method}
+\alias{addTtest,SummarizedExperiment-method}
+\title{Perform t-test}
+\usage{
+getTtest(x, ...)
+
+addTtest(x, ...)
+
+\S4method{getTtest}{SingleCellExperiment}(x, ...)
+
+\S4method{getTtest}{SummarizedExperiment}(
+ x,
+ assay.type = NULL,
+ row.var = NULL,
+ col.var = NULL,
+ formula,
+ split.by = NULL,
+ pair.by = NULL,
+ features = NULL,
+ var.equal = FALSE,
+ p.adjust.method = "fdr",
+ ...
+)
+
+\S4method{addTtest}{SummarizedExperiment}(x, name = "ttest", ...)
+}
+\arguments{
+\item{x}{A \code{SummarizedExperiment} object.}
+
+\item{...}{Additional arguments passed to \code{rstatix::t_test}.}
+
+\item{assay.type}{\code{Character scalar} or \code{NULL}. Specifies assay to
+test. Tests are run per feature. (Default: \code{NULL} )}
+
+\item{row.var}{\code{Character scalar} or \code{NULL}. Specifies variable
+from \code{rowData(x)} to test. (Default: \code{NULL} )}
+
+\item{col.var}{\code{Character scalar} or \code{NULL}. Specifies variable
+from \code{colData(x)} to test. (Default: \code{NULL} )}
+
+\item{formula}{\code{formula}. A formula specifying the grouping variable,
+e.g., \code{~ SampleType}. The RHS specifies the comparison groups.
+For >2 levels, pairwise comparisons are performed.}
+
+\item{split.by}{\code{Character vector} or \code{NULL}. Columns to split by.
+Tests are run separately for each combination. (Default: \code{NULL} )}
+
+\item{pair.by}{\code{Character scalar} or \code{NULL}. Column for pairing
+samples in paired tests. (Default: \code{NULL} )}
+
+\item{features}{\code{Character vector} or \code{NULL}. Specific features
+to test when using \code{assay.type}. (Default: \code{NULL} )}
+
+\item{var.equal}{\code{Logical scalar}. Assume equal variances?
+(Default: \code{FALSE} )}
+
+\item{p.adjust.method}{\code{Character scalar}. Method for p-value
+adjustment. (Default: \code{"fdr"} )}
+
+\item{name}{\code{Character scalar}. Column name prefix for results.
+(Default: \code{"ttest"} )}
+}
+\value{
+\code{getTtest} returns a \code{DataFrame} with test results.
+\code{addTtest} returns \code{x} with results added to \code{rowData(x)}.
+}
+\description{
+These functions perform t-test to compare values between two groups.
+}
+\details{
+By default, Welch's t-test is used which does not assume equal variances.
+Set \code{var.equal = TRUE} for Student's t-test.
+
+Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var}
+to define the values to test.
+}
+\examples{
+data(GlobalPatterns, package = "mia")
+tse <- GlobalPatterns
+tse <- tse[1:50, tse$SampleType \%in\% c("Feces", "Skin")]
+
+# Test assay values (per feature)
+res <- getTtest(tse, assay.type = "counts", formula = ~SampleType)
+
+# Test colData variable (e.g., alpha diversity)
+tse <- mia::addAlpha(tse, index = "shannon_diversity")
+res <- getTtest(tse, col.var = "shannon_diversity", formula = ~SampleType)
+
+}
+\seealso{
+\code{\link[rstatix:t_test]{rstatix::t_test}},
+\code{\link[daa:getWilcoxon]{getWilcoxon}}
+}
diff --git a/man/getWilcoxon.Rd b/man/getWilcoxon.Rd
new file mode 100644
index 0000000..964dd32
--- /dev/null
+++ b/man/getWilcoxon.Rd
@@ -0,0 +1,97 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/AllGenerics.R, R/getWilcoxon.R
+\name{getWilcoxon}
+\alias{getWilcoxon}
+\alias{addWilcoxon}
+\alias{getWilcoxon,SingleCellExperiment-method}
+\alias{getWilcoxon,SummarizedExperiment-method}
+\alias{addWilcoxon,SummarizedExperiment-method}
+\title{Perform Wilcoxon rank-sum test}
+\usage{
+getWilcoxon(x, ...)
+
+addWilcoxon(x, ...)
+
+\S4method{getWilcoxon}{SingleCellExperiment}(x, ...)
+
+\S4method{getWilcoxon}{SummarizedExperiment}(
+ x,
+ assay.type = NULL,
+ row.var = NULL,
+ col.var = NULL,
+ formula,
+ split.by = NULL,
+ pair.by = NULL,
+ features = NULL,
+ p.adjust.method = "fdr",
+ ...
+)
+
+\S4method{addWilcoxon}{SummarizedExperiment}(x, name = "wilcoxon", ...)
+}
+\arguments{
+\item{x}{A \code{SummarizedExperiment} object.}
+
+\item{...}{Additional arguments passed to \code{rstatix::wilcox_test}.}
+
+\item{assay.type}{\code{Character scalar} or \code{NULL}. Specifies assay to
+test. Tests are run per feature. (Default: \code{NULL} )}
+
+\item{row.var}{\code{Character scalar} or \code{NULL}. Specifies variable
+from \code{rowData(x)} to test. (Default: \code{NULL} )}
+
+\item{col.var}{\code{Character scalar} or \code{NULL}. Specifies variable
+from \code{colData(x)} to test. (Default: \code{NULL} )}
+
+\item{formula}{\code{formula}. A formula specifying the grouping variable,
+e.g., \code{~ SampleType}. The RHS specifies the comparison groups.
+For >2 levels, pairwise comparisons are performed.}
+
+\item{split.by}{\code{Character vector} or \code{NULL}. Columns to split by.
+Tests are run separately for each combination. (Default: \code{NULL} )}
+
+\item{pair.by}{\code{Character scalar} or \code{NULL}. Column for pairing
+samples in paired tests. (Default: \code{NULL} )}
+
+\item{features}{\code{Character vector} or \code{NULL}. Specific features
+to test when using \code{assay.type}. (Default: \code{NULL} )}
+
+\item{p.adjust.method}{\code{Character scalar}. Method for p-value
+adjustment. (Default: \code{"fdr"} )}
+
+\item{name}{\code{Character scalar}. Column name prefix for results.
+(Default: \code{"wilcoxon"} )}
+}
+\value{
+\code{getWilcoxon} returns a \code{DataFrame} with test results.
+\code{addWilcoxon} returns \code{x} with results added to \code{rowData(x)}.
+}
+\description{
+These functions perform Wilcoxon rank-sum test (Mann-Whitney U) to compare
+values between two groups.
+}
+\details{
+The Wilcoxon rank-sum test is a non-parametric test used to compare two
+independent groups. It is suitable when data does not meet normality
+assumptions.
+
+Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var}
+to define the values to test.
+}
+\examples{
+data(GlobalPatterns, package = "mia")
+tse <- GlobalPatterns
+tse <- tse[1:50, tse$SampleType \%in\% c("Feces", "Skin")]
+
+# Test assay values (per feature)
+res <- getWilcoxon(tse, assay.type = "counts", formula = ~SampleType)
+
+# Test colData variable (e.g., alpha diversity)
+tse <- mia::addAlpha(tse, index = "shannon_diversity")
+res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~SampleType)
+
+}
+\seealso{
+\code{\link[rstatix:wilcox_test]{rstatix::wilcox_test}},
+\code{\link[daa:getTtest]{getTtest}}
+}
diff --git a/tests/testthat.R b/tests/testthat.R
index 54620d4..f2759bb 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,10 +1,9 @@
# This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
-# Where should you do additional test configuration?
-# Learn more about the roles of various files in:
-# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
-# * https://testthat.r-lib.org/articles/special-files.html
+# Where should you do your testing?
+# Learn more about the recommended testthat workflow here:
+# https://r-pkgs.org/testing-design.html#sec-tests-files-overview
library(testthat)
library(daa)
diff --git a/tests/testthat/test-example_test.R b/tests/testthat/test-example_test.R
deleted file mode 100644
index 94b301e..0000000
--- a/tests/testthat/test-example_test.R
+++ /dev/null
@@ -1,3 +0,0 @@
-test_that("multiplication works", {
- expect_equal(2 * 2, 4)
-})
diff --git a/tests/testthat/test-getTtest.R b/tests/testthat/test-getTtest.R
new file mode 100644
index 0000000..c2e7b7f
--- /dev/null
+++ b/tests/testthat/test-getTtest.R
@@ -0,0 +1,144 @@
+# Tests for getTtest
+
+# Use HintikkaXOData - has good variance for statistical tests
+# Need getWithColData to get colData from MAE
+data(HintikkaXOData, package = "mia")
+mae <- HintikkaXOData
+tse <- MultiAssayExperiment::getWithColData(mae, "microbiota")
+tse <- mia::transformAssay(tse, method = "relabundance")
+tse <- tse[1:10, ]
+
+################################################################################
+# assay.type tests (per-feature)
+################################################################################
+
+test_that("getTtest with assay.type returns DataFrame", {
+ result <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+
+ expect_s4_class(result, "DataFrame")
+ expect_true("p" %in% names(result))
+ expect_true("p.adj" %in% names(result))
+})
+
+test_that("getTtest with assay.type returns one row per feature", {
+ result <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+ expect_equal(nrow(result), nrow(tse))
+})
+
+test_that("getTtest respects features argument", {
+ feat_subset <- rownames(tse)[1:5]
+ result <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, features = feat_subset
+ )
+ expect_equal(nrow(result), 5)
+})
+
+################################################################################
+# col.var tests (colData variable)
+################################################################################
+
+test_that("getTtest with col.var works", {
+ tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity")
+ result <- getTtest(tse_alpha,
+ col.var = "shannon_diversity",
+ formula = ~Fat
+ )
+
+ expect_s4_class(result, "DataFrame")
+ expect_equal(nrow(result), 1) # Single test, not per-feature
+})
+
+################################################################################
+# Validation tests
+################################################################################
+
+test_that("getTtest fails without data source", {
+ expect_error(getTtest(tse, formula = ~Fat), "either")
+})
+
+test_that("getTtest fails with invalid formula", {
+ expect_error(
+ getTtest(tse, assay.type = "relabundance", formula = "not a formula"),
+ "must be a formula"
+ )
+})
+
+test_that("getTtest var.equal parameter works", {
+ result_welch <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, var.equal = FALSE
+ )
+ result_student <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, var.equal = TRUE
+ )
+ expect_true(is.numeric(result_welch$p))
+ expect_true(is.numeric(result_student$p))
+})
+
+################################################################################
+# Robustness tests
+################################################################################
+
+test_that("getTtest handles zero variance with warnings", {
+ # Create a feature with zero variance to trigger error in t-test
+ tse_sparse <- tse
+ assay(tse_sparse, "relabundance")[1, ] <- 1 # All same values
+
+ expect_warning(
+ result <- getTtest(tse_sparse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ ),
+ "failed"
+ )
+ expect_true(any(is.na(result$p)))
+ expect_true("warning" %in% colnames(result))
+})
+
+test_that("getTtest works with un-sorted paired data", {
+ # Create paired data and shuffle it
+ tse_paired <- tse[, 1:10]
+ tse_paired$Subject <- rep(1:5, each = 2)
+ tse_paired$Time <- rep(c("T1", "T2"), 5)
+
+ # Shuffle
+ idx <- sample(ncol(tse_paired))
+ tse_shuffled <- tse_paired[, idx]
+
+ # This should pass because we sort internally by Subject
+ expect_silent(
+ result <- getTtest(tse_shuffled,
+ assay.type = "relabundance",
+ formula = ~Time, pair.by = "Subject"
+ )
+ )
+ expect_s4_class(result, "DataFrame")
+})
+
+test_that("getTtest p.adjust.method parameter works", {
+ result <- getTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, p.adjust.method = "bonferroni"
+ )
+ expect_true("p.adj" %in% names(result))
+})
+
+################################################################################
+# addTtest tests
+################################################################################
+
+test_that("addTtest adds p.adj to rowData", {
+ result <- addTtest(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+ expect_true("ttest_padj" %in% colnames(rowData(result)))
+})
diff --git a/tests/testthat/test-getWilcoxon.R b/tests/testthat/test-getWilcoxon.R
new file mode 100644
index 0000000..0c81d7e
--- /dev/null
+++ b/tests/testthat/test-getWilcoxon.R
@@ -0,0 +1,130 @@
+# Tests for getWilcoxon
+
+# Use HintikkaXOData - has good variance for statistical tests
+data(HintikkaXOData, package = "mia")
+mae <- HintikkaXOData
+tse <- MultiAssayExperiment::getWithColData(mae, "microbiota")
+tse <- mia::transformAssay(tse, method = "relabundance")
+tse <- tse[1:10, ]
+
+################################################################################
+# assay.type tests (per-feature)
+################################################################################
+
+test_that("getWilcoxon with assay.type returns DataFrame", {
+ result <- getWilcoxon(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+
+ expect_s4_class(result, "DataFrame")
+ expect_true("p" %in% names(result))
+ expect_true("p.adj" %in% names(result))
+})
+
+test_that("getWilcoxon with assay.type returns one row per feature", {
+ result <- getWilcoxon(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+ expect_equal(nrow(result), nrow(tse))
+})
+
+test_that("getWilcoxon respects features argument", {
+ feat_subset <- rownames(tse)[1:5]
+ result <- getWilcoxon(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, features = feat_subset
+ )
+ expect_equal(nrow(result), 5)
+})
+
+################################################################################
+# col.var tests (colData variable)
+################################################################################
+
+test_that("getWilcoxon with col.var works", {
+ tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity")
+ result <- getWilcoxon(tse_alpha,
+ col.var = "shannon_diversity",
+ formula = ~Fat
+ )
+
+ expect_s4_class(result, "DataFrame")
+ expect_equal(nrow(result), 1) # Single test, not per-feature
+})
+
+################################################################################
+# Robustness tests
+################################################################################
+
+test_that("getWilcoxon handles sparse data with warnings", {
+ # Create a feature with all NAs to trigger error
+ tse_sparse <- tse
+ assay(tse_sparse, "relabundance")[1, ] <- NA_real_
+
+ expect_warning(
+ result <- getWilcoxon(tse_sparse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ ),
+ "failed"
+ )
+ expect_true(any(is.na(result$p)))
+ expect_true("warning" %in% colnames(result))
+})
+
+test_that("getWilcoxon works with un-sorted paired data", {
+ # Create paired data and shuffle it
+ tse_paired <- tse[, 1:10]
+ tse_paired$Subject <- rep(1:5, each = 2)
+ tse_paired$Time <- rep(c("T1", "T2"), 5)
+
+ # Shuffle
+ idx <- sample(ncol(tse_paired))
+ tse_shuffled <- tse_paired[, idx]
+
+ # This should pass because we sort internally by Subject
+ expect_silent(
+ result <- getWilcoxon(tse_shuffled,
+ assay.type = "relabundance",
+ formula = ~Time, pair.by = "Subject"
+ )
+ )
+ expect_s4_class(result, "DataFrame")
+})
+
+################################################################################
+# Validation tests
+################################################################################
+
+test_that("getWilcoxon fails without data source", {
+ expect_error(getWilcoxon(tse, formula = ~Fat), "either")
+})
+
+test_that("getWilcoxon fails with invalid formula", {
+ expect_error(
+ getWilcoxon(tse, assay.type = "relabundance", formula = "not a formula"),
+ "must be a formula"
+ )
+})
+
+test_that("getWilcoxon p.adjust.method parameter works", {
+ result <- getWilcoxon(tse,
+ assay.type = "relabundance",
+ formula = ~Fat, p.adjust.method = "bonferroni"
+ )
+ expect_true("p.adj" %in% names(result))
+})
+
+################################################################################
+# addWilcoxon tests
+################################################################################
+
+test_that("addWilcoxon adds p.adj to rowData", {
+ result <- addWilcoxon(tse,
+ assay.type = "relabundance",
+ formula = ~Fat
+ )
+ expect_true("wilcoxon_padj" %in% colnames(rowData(result)))
+})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
new file mode 100644
index 0000000..1c4e2ec
--- /dev/null
+++ b/tests/testthat/test-utils.R
@@ -0,0 +1,149 @@
+# Tests for utils.R functions
+
+# Use HintikkaXOData for reliable tests
+# Need getWithColData to get colData from MAE
+data(HintikkaXOData, package = "mia")
+mae <- HintikkaXOData
+tse <- MultiAssayExperiment::getWithColData(mae, "microbiota")
+tse <- mia::transformAssay(tse, method = "relabundance")
+tse <- tse[1:10, ]
+
+################################################################################
+# .check_input tests
+################################################################################
+
+test_that(".check_input requires exactly one data source", {
+ expect_error(
+ .check_input(tse, NULL, NULL, NULL, ~Fat, NULL, NULL, NULL),
+ "either"
+ )
+ expect_error(
+ .check_input(
+ tse, "relabundance", "var", NULL, ~Fat,
+ NULL, NULL, NULL
+ ),
+ "either"
+ )
+})
+
+test_that(".check_input validates features only with assay.type", {
+ expect_error(
+ .check_input(tse, NULL, NULL, "Fat", ~Fat, NULL, NULL,
+ features = "Taxa1"
+ ),
+ "features"
+ )
+})
+
+test_that(".check_input validates features exist", {
+ expect_error(
+ .check_input(tse, "relabundance", NULL, NULL, ~Fat, NULL, NULL,
+ features = c("nonexistent")
+ ),
+ "features"
+ )
+})
+
+test_that(".check_input validates formula", {
+ expect_error(
+ .check_input(
+ tse, "relabundance", NULL, NULL, "not a formula",
+ NULL, NULL, NULL
+ ),
+ "must be a formula"
+ )
+})
+
+test_that(".check_input validates formula variable exists", {
+ expect_error(
+ .check_input(
+ tse, "relabundance", NULL, NULL, ~nonexistent,
+ NULL, NULL, NULL
+ ),
+ "must be a single character value from the following options"
+ )
+})
+
+test_that(".check_input validates split.by exists", {
+ expect_error(
+ .check_input(
+ tse, "relabundance", NULL, NULL, ~Fat,
+ "nonexistent", NULL, NULL
+ ),
+ "must be a single character value from the following options"
+ )
+})
+
+test_that(".check_input validates pair.by exists", {
+ expect_error(
+ .check_input(
+ tse, "relabundance", NULL, NULL, ~Fat, NULL,
+ "nonexistent", NULL
+ ),
+ "must be a single character value from the following options"
+ )
+})
+
+test_that(".check_input passes with valid inputs", {
+ expect_silent(
+ .check_input(
+ tse, "relabundance", NULL, NULL, ~Fat,
+ NULL, NULL, NULL
+ )
+ )
+})
+
+################################################################################
+# .get_rhs_var tests
+################################################################################
+
+test_that(".get_rhs_var extracts group from formula", {
+ result <- .get_rhs_var(~Fat)
+ expect_equal(result, "Fat")
+})
+
+test_that(".get_rhs_var fails with non-formula", {
+ expect_error(.get_rhs_var("not a formula"), "must be a formula")
+})
+
+################################################################################
+# .check_group tests
+################################################################################
+
+test_that(".check_group validates at least 2 levels", {
+ df <- data.frame(group = c("A"), value = 1)
+ expect_error(.check_group(df, "group"), "at least 2 levels")
+})
+
+test_that(".check_group passes with valid 2-level group", {
+ df <- data.frame(group = c("A", "B"), value = 1:2)
+ expect_silent(.check_group(df, "group"))
+})
+
+################################################################################
+# .get_data tests
+################################################################################
+
+test_that(".get_data with assay.type returns correct format", {
+ df <- .get_data(
+ tse, "relabundance", NULL, NULL, "Fat",
+ NULL, NULL, NULL
+ )
+
+ expect_s3_class(df, "data.frame")
+ expect_true("relabundance" %in% names(df))
+ expect_true("Fat" %in% names(df))
+ expect_true("FeatureID" %in% names(df))
+})
+
+test_that(".get_data with col.var returns correct format", {
+ tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity")
+ df <- .get_data(
+ tse_alpha, NULL, NULL, "shannon_diversity",
+ "Fat", NULL, NULL, NULL
+ )
+
+ expect_s3_class(df, "data.frame")
+ expect_true("shannon_diversity" %in% names(df))
+ expect_true("Fat" %in% names(df))
+})
diff --git a/vignettes/daa.Rmd b/vignettes/daa.Rmd
index 86bb835..1348a11 100644
--- a/vignettes/daa.Rmd
+++ b/vignettes/daa.Rmd
@@ -1,19 +1,9 @@
---
title: "Introduction to daa"
-author:
- - name: Your name
- affiliation:
- - Your institution
- email: your.email@somewhere.com
output:
BiocStyle::html_document:
- self_contained: yes
toc: true
toc_float: true
- toc_depth: 2
- code_folding: show
-date: "`r doc_date()`"
-package: "`r pkg_ver('daa')`"
vignette: >
%\VignetteIndexEntry{Introduction to daa}
%\VignetteEngine{knitr::rmarkdown}
@@ -22,113 +12,85 @@ vignette: >
```{r setup, include = FALSE}
knitr::opts_chunk$set(
- collapse = TRUE,
- comment = "#>",
- crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
+ collapse = TRUE,
+ comment = "#>"
)
```
+# Introduction
-```{r vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE}
-## Bib setup
-library("RefManageR")
-
-## Write bibliography information
-bib <- c(
- R = citation(),
- BiocStyle = citation("BiocStyle")[1],
- knitr = citation("knitr")[1],
- RefManageR = citation("RefManageR")[1],
- rmarkdown = citation("rmarkdown")[1],
- sessioninfo = citation("sessioninfo")[1],
- testthat = citation("testthat")[1],
- daa = citation("daa")[1]
-)
-```
-
-# Basics
-
-## Install `daa`
+The **daa** package provides methods for differential abundance analysis (DAA)
+of microbiome data. It works with `TreeSummarizedExperiment` and
+`SummarizedExperiment` objects from the miaverse ecosystem.
-`R` is an open-source statistical environment which can be easily modified to enhance its functionality via packages. `r Biocpkg("daa")` is a `R` package available via the [Bioconductor](http://bioconductor.org) repository for packages. `R` can be installed on any operating system from [CRAN](https://cran.r-project.org/) after which you can install `r Biocpkg("daa")` by using the following commands in your `R` session:
+## Installation
-```{r "install", eval = FALSE}
+```{r install, eval = FALSE}
if (!requireNamespace("BiocManager", quietly = TRUE)) {
- install.packages("BiocManager")
+ install.packages("BiocManager")
}
BiocManager::install("daa")
-
-## Check that you have a valid Bioconductor installation
-BiocManager::valid()
```
-## Required knowledge
+# Available Methods
-`r Biocpkg("daa")` is based on many other packages and in particular in those that have implemented the infrastructure needed for dealing with RNA-seq data (EDIT!). That is, packages like `r Biocpkg("SummarizedExperiment")` (EDIT!).
+## Wilcoxon rank-sum test
-If you are asking yourself the question "Where do I start using Bioconductor?" you might be interested in [this blog post](http://lcolladotor.github.io/2014/10/16/startbioc/#.VkOKbq6rRuU).
+The `getWilcoxon()` function performs nonparametric Wilcoxon rank-sum tests
+for each feature, comparing two groups.
-## Asking for help
+```{r wilcoxon, message = FALSE}
+library(daa)
+library(mia)
-As package developers, we try to explain clearly how to use our packages and in which order to use the functions. But `R` and `Bioconductor` have a steep learning curve so it is critical to learn where to ask for help. The blog post quoted above mentions some but we would like to highlight the [Bioconductor support site](https://support.bioconductor.org/) as the main resource for getting help: remember to use the `daa` tag and check [the older posts](https://support.bioconductor.org/tag/daa/). Other alternatives are available such as creating GitHub issues and tweeting. However, please note that if you want to receive help you should adhere to the [posting guidelines](http://www.bioconductor.org/help/support/posting-guide/). It is particularly critical that you provide a small reproducible example and your session information so package developers can track down the source of the error.
+# Load example data (HintikkaXOData is used in unit tests)
+data(HintikkaXOData, package = "mia")
+tse <- MultiAssayExperiment::getWithColData(HintikkaXOData, "microbiota")
-## Citing `daa`
+# Standardize assay data
+tse <- transformAssay(tse, method = "relabundance")
-We hope that `r Biocpkg("daa")` will be useful for your research. Please use the following information to cite the package and the overall approach. Thank you!
+# Subset for demonstration
+tse <- tse[1:50, ]
-```{r "citation"}
-## Citation info
-citation("daa")
+# Run Wilcoxon test comparing groups in 'Fat' metadata column
+res <- getWilcoxon(tse, assay.type = "relabundance", formula = ~Fat)
+head(res)
```
-# Quick start to using `daa`
+Use `addWilcoxon()` to store results in `rowData()`:
-```{r "start", message=FALSE}
-library("daa")
+```{r add_wilcoxon}
+tse <- addWilcoxon(tse, assay.type = "relabundance", formula = ~Fat)
+head(rowData(tse)[, "wilcoxon_padj", drop = FALSE])
```
-Edit this as you see fit =)
-
-Here is an example of you can cite your package inside the vignette:
-
-* `r Biocpkg("daa")` `r Citep(bib[["daa"]])`
+## t-test
+The `getTtest()` function performs t-tests (Welch's by default).
-
-# Reproducibility
-
-The `r Biocpkg("daa")` package `r Citep(bib[["daa"]])` was made possible thanks to:
-
-* R `r Citep(bib[["R"]])`
-* `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])`
-* `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])`
-* `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])`
-* `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])`
-* `r CRANpkg("sessioninfo")` `r Citep(bib[["sessioninfo"]])`
-* `r CRANpkg("testthat")` `r Citep(bib[["testthat"]])`
-
-This package was developed using `r BiocStyle::Biocpkg("biocthis")`.
-
-`R` session information.
-
-```{r reproduce3, echo=FALSE}
-## Session info
-library("sessioninfo")
-options(width = 120)
-session_info()
+```{r ttest}
+res <- getTtest(tse, assay.type = "relabundance", formula = ~Fat)
+head(res)
```
+## Testing metadata variables
+You can also test variables from `colData` or `rowData`. For example, testing
+alpha diversity indices:
-# Bibliography
+```{r metadata}
+# Add alpha diversity to colData
+tse <- mia::addAlpha(tse, index = "shannon_diversity")
-This vignette was generated using `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])`
-with `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])` and `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])` running behind the scenes.
+# Test diversity differences between groups
+res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~Fat)
+head(res)
+```
-Citations made with `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])`.
+# Session Info
-```{r vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE}
-## Print bibliography
-PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))
+```{r session}
+sessionInfo()
```