diff --git a/.Rbuildignore b/.Rbuildignore index 40e9f50..4fc5fa0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,5 @@ ^README\.Rmd$ ^\.github$ ^codecov\.yml$ +^pkgdown$ +^README\.html$ diff --git a/DESCRIPTION b/DESCRIPTION index e961c29..8331b9d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,17 +15,24 @@ BugReports: https://github.com/microbiome/daa/issues/ biocViews: Software, Microbiome Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: - R (>= 4.5.0) + R (>= 4.5.0), + mia Imports: - dplyr + dplyr, + methods, + miaViz, + purrr, + rstatix, + S4Vectors, + SingleCellExperiment, + SummarizedExperiment Suggests: BiocStyle, knitr, - RefManageR, + MultiAssayExperiment, rmarkdown, - sessioninfo, testthat Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..e5bf65f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,26 @@ # Generated by roxygen2: do not edit by hand +export(addTtest) +export(addWilcoxon) +export(getTtest) +export(getWilcoxon) +exportMethods(addTtest) +exportMethods(addWilcoxon) +exportMethods(getTtest) +exportMethods(getWilcoxon) +importFrom(S4Vectors,DataFrame) +importFrom(SingleCellExperiment,altExp) +importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,colData) +importFrom(SummarizedExperiment,rowData) +importFrom(dplyr,across) +importFrom(dplyr,all_of) +importFrom(dplyr,arrange) +importFrom(dplyr,group_split) +importFrom(methods,callNextMethod) +importFrom(mia,meltSE) +importFrom(purrr,map_df) +importFrom(rstatix,adjust_pvalue) +importFrom(rstatix,t_test) +importFrom(rstatix,wilcox_test) +importFrom(stats,as.formula) diff --git a/R/AllGenerics.R b/R/AllGenerics.R new file mode 100644 index 0000000..2661b79 --- /dev/null +++ b/R/AllGenerics.R @@ -0,0 +1,25 @@ +# This file contains all the generics + +#' @rdname getWilcoxon +#' @export +setGeneric( "getWilcoxon", signature = "x", function ( x, ... ){ + standardGeneric("getWilcoxon") +}) + +#' @rdname getWilcoxon +#' @export +setGeneric( "addWilcoxon", signature = "x", function ( x, ... ){ + standardGeneric("addWilcoxon") +}) + +#' @rdname getTtest +#' @export +setGeneric( "getTtest", signature = "x", function ( x, ... ){ + standardGeneric("getTtest") +}) + +#' @rdname getTtest +#' @export +setGeneric( "addTtest", signature = "x", function ( x, ... ){ + standardGeneric("addTtest") +}) diff --git a/R/getTtest.R b/R/getTtest.R new file mode 100644 index 0000000..0455782 --- /dev/null +++ b/R/getTtest.R @@ -0,0 +1,149 @@ +#' @name +#' getTtest +#' +#' @title +#' Perform t-test +#' +#' @description +#' These functions perform t-test to compare values between two groups. +#' +#' @details +#' By default, Welch's t-test is used which does not assume equal variances. +#' Set \code{var.equal = TRUE} for Student's t-test. +#' +#' Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var} +#' to define the values to test. +#' +#' @return +#' \code{getTtest} returns a \code{DataFrame} with test results. +#' \code{addTtest} returns \code{x} with results added to \code{rowData(x)}. +#' +#' @param x A \code{SummarizedExperiment} object. +#' +#' @param assay.type \code{Character scalar} or \code{NULL}. Specifies assay to +#' test. Tests are run per feature. (Default: \code{NULL}) +#' +#' @param row.var \code{Character scalar} or \code{NULL}. Specifies variable +#' from \code{rowData(x)} to test. (Default: \code{NULL}) +#' +#' @param col.var \code{Character scalar} or \code{NULL}. Specifies variable +#' from \code{colData(x)} to test. (Default: \code{NULL}) +#' +#' @param formula \code{formula}. A formula specifying the grouping variable, +#' e.g., \code{~ SampleType}. The RHS specifies the comparison groups. +#' For >2 levels, pairwise comparisons are performed. +#' +#' @param split.by \code{Character vector} or \code{NULL}. Columns to split by. +#' Tests are run separately for each combination. (Default: \code{NULL}) +#' +#' @param pair.by \code{Character scalar} or \code{NULL}. Column for pairing +#' samples in paired tests. (Default: \code{NULL}) +#' +#' @param features \code{Character vector} or \code{NULL}. Specific features +#' to test when using \code{assay.type}. (Default: \code{NULL}) +#' +#' @param var.equal \code{Logical scalar}. Assume equal variances? +#' (Default: \code{FALSE}) +#' +#' @param p.adjust.method \code{Character scalar}. Method for p-value +#' adjustment. (Default: \code{"fdr"}) +#' +#' @param name \code{Character scalar}. Column name prefix for results. +#' (Default: \code{"ttest"}) +#' +#' @param ... Additional arguments passed to \code{rstatix::t_test}. +#' +#' @examples +#' data(GlobalPatterns, package = "mia") +#' tse <- GlobalPatterns +#' tse <- tse[1:50, tse$SampleType %in% c("Feces", "Skin")] +#' +#' # Test assay values (per feature) +#' res <- getTtest(tse, assay.type = "counts", formula = ~SampleType) +#' +#' # Test colData variable (e.g., alpha diversity) +#' tse <- mia::addAlpha(tse, index = "shannon_diversity") +#' res <- getTtest(tse, col.var = "shannon_diversity", formula = ~SampleType) +#' +#' @seealso +#' \code{\link[rstatix:t_test]{rstatix::t_test}}, +#' \code{\link[daa:getWilcoxon]{getWilcoxon}} +#' +#' @export +NULL + +#' @rdname getTtest +#' @export +#' @importFrom SingleCellExperiment altExp +#' @importFrom methods callNextMethod +setMethod("getTtest", "SingleCellExperiment", function (x, ... ){ + x <- .check_and_get_altExp(x, ...) + res <- callNextMethod(x, ...) + return(res) +}) + +#' @rdname getTtest +#' @export +#' @importFrom SummarizedExperiment assay colData rowData +#' @importFrom rstatix t_test adjust_pvalue +#' @importFrom S4Vectors DataFrame +setMethod( + "getTtest", "SummarizedExperiment", + function (x, assay.type = NULL, row.var = NULL, col.var = NULL, + formula, split.by = NULL, pair.by = NULL, features = NULL, + var.equal = FALSE, p.adjust.method = "fdr", ... ){ + ############################# Input check ############################## + group <- .check_input( + x, assay.type, row.var, col.var, formula, + split.by, pair.by, features + ) + if( !.is_a_bool(var.equal) ){ + stop("'var.equal' must be TRUE or FALSE.", call. = FALSE) + } + ########################### Input check end ############################ + # Get y variable name (the column to test) + y <- c(assay.type, row.var, col.var) + # Get data based on source + df <- .get_data( + x, assay.type, row.var, col.var, group, + split.by, pair.by, features + ) + # Run tests + paired <- !is.null(pair.by) + res <- .run_ttest( + df, y, group, split.by, paired, var.equal, + p.adjust.method, features, ... + ) + return(res) + } +) + +#' @rdname getTtest +#' @export +setMethod( + "addTtest", "SummarizedExperiment", + function (x, name = "ttest", ... ){ + if( !.is_non_empty_string(name) ){ + stop("'name' must be a single character value.", call. = FALSE) + } + res <- getTtest(x, ...) + x <- .add_values_to_colData(x, list(res$p.adj), paste0(name, "_padj"), + MARGIN = 1 + ) + return(x) + } +) + +################################################################################ +# Internal function using .calc_daa engine +################################################################################ + +#' @importFrom rstatix t_test +.run_ttest <- function (df, y, group, split.by, paired, var.equal, + p.adjust.method, features = NULL, ... ){ + .calc_daa( + df = df, y = y, group = group, split.by = split.by, + paired = paired, FUN = rstatix::t_test, + p.adjust.method = p.adjust.method, features = features, var.equal = var.equal, ... + ) +} diff --git a/R/getWilcoxon.R b/R/getWilcoxon.R new file mode 100644 index 0000000..734248d --- /dev/null +++ b/R/getWilcoxon.R @@ -0,0 +1,144 @@ +#' @name +#' getWilcoxon +#' +#' @title +#' Perform Wilcoxon rank-sum test +#' +#' @description +#' These functions perform Wilcoxon rank-sum test (Mann-Whitney U) to compare +#' values between two groups. +#' +#' @details +#' The Wilcoxon rank-sum test is a non-parametric test used to compare two +#' independent groups. It is suitable when data does not meet normality +#' assumptions. +#' +#' Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var} +#' to define the values to test. +#' +#' @return +#' \code{getWilcoxon} returns a \code{DataFrame} with test results. +#' \code{addWilcoxon} returns \code{x} with results added to \code{rowData(x)}. +#' +#' @param x A \code{SummarizedExperiment} object. +#' +#' @param assay.type \code{Character scalar} or \code{NULL}. Specifies assay to +#' test. Tests are run per feature. (Default: \code{NULL}) +#' +#' @param row.var \code{Character scalar} or \code{NULL}. Specifies variable +#' from \code{rowData(x)} to test. (Default: \code{NULL}) +#' +#' @param col.var \code{Character scalar} or \code{NULL}. Specifies variable +#' from \code{colData(x)} to test. (Default: \code{NULL}) +#' +#' @param formula \code{formula}. A formula specifying the grouping variable, +#' e.g., \code{~ SampleType}. The RHS specifies the comparison groups. +#' For >2 levels, pairwise comparisons are performed. +#' +#' @param split.by \code{Character vector} or \code{NULL}. Columns to split by. +#' Tests are run separately for each combination. (Default: \code{NULL}) +#' +#' @param pair.by \code{Character scalar} or \code{NULL}. Column for pairing +#' samples in paired tests. (Default: \code{NULL}) +#' +#' @param features \code{Character vector} or \code{NULL}. Specific features +#' to test when using \code{assay.type}. (Default: \code{NULL}) +#' +#' @param p.adjust.method \code{Character scalar}. Method for p-value +#' adjustment. (Default: \code{"fdr"}) +#' +#' @param name \code{Character scalar}. Column name prefix for results. +#' (Default: \code{"wilcoxon"}) +#' +#' @param ... Additional arguments passed to \code{rstatix::wilcox_test}. +#' +#' @examples +#' data(GlobalPatterns, package = "mia") +#' tse <- GlobalPatterns +#' tse <- tse[1:50, tse$SampleType %in% c("Feces", "Skin")] +#' +#' # Test assay values (per feature) +#' res <- getWilcoxon(tse, assay.type = "counts", formula = ~SampleType) +#' +#' # Test colData variable (e.g., alpha diversity) +#' tse <- mia::addAlpha(tse, index = "shannon_diversity") +#' res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~SampleType) +#' +#' @seealso +#' \code{\link[rstatix:wilcox_test]{rstatix::wilcox_test}}, +#' \code{\link[daa:getTtest]{getTtest}} +#' +#' @export +NULL + +#' @rdname getWilcoxon +#' @export +#' @importFrom SingleCellExperiment altExp +#' @importFrom methods callNextMethod +setMethod("getWilcoxon", "SingleCellExperiment", function (x, ... ){ + x <- .check_and_get_altExp(x, ...) + res <- callNextMethod(x, ...) + return(res) +}) + +#' @rdname getWilcoxon +#' @export +#' @importFrom SummarizedExperiment assay colData rowData +#' @importFrom rstatix wilcox_test adjust_pvalue +#' @importFrom S4Vectors DataFrame +setMethod( + "getWilcoxon", "SummarizedExperiment", + function (x, assay.type = NULL, row.var = NULL, col.var = NULL, + formula, split.by = NULL, pair.by = NULL, features = NULL, + p.adjust.method = "fdr", ... ){ + ############################# Input check ############################## + group <- .check_input( + x, assay.type, row.var, col.var, formula, + split.by, pair.by, features + ) + ########################### Input check end ############################ + # Get y variable name (the column to test) + y <- c(assay.type, row.var, col.var) + # Get data based on source + df <- .get_data( + x, assay.type, row.var, col.var, group, + split.by, pair.by, features + ) + # Run tests + paired <- !is.null(pair.by) + res <- .run_wilcoxon( + df, y, group, split.by, paired, + p.adjust.method, features, ... + ) + return(res) + } +) + +#' @rdname getWilcoxon +#' @export +setMethod( + "addWilcoxon", "SummarizedExperiment", + function (x, name = "wilcoxon", ... ){ + if( !.is_non_empty_string(name) ){ + stop("'name' must be a single character value.", call. = FALSE) + } + res <- getWilcoxon(x, ...) + x <- .add_values_to_colData(x, list(res$p.adj), paste0(name, "_padj"), + MARGIN = 1 + ) + return(x) + } +) + +################################################################################ +# Internal function using .calc_daa engine +################################################################################ + +#' @importFrom rstatix wilcox_test +.run_wilcoxon <- function (df, y, group, split.by, paired, p.adjust.method, features = NULL, ... ){ + .calc_daa( + df = df, y = y, group = group, split.by = split.by, + paired = paired, FUN = rstatix::wilcox_test, + p.adjust.method = p.adjust.method, features = features, ... + ) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..4c6f4da --- /dev/null +++ b/R/utils.R @@ -0,0 +1,306 @@ +################################################################################ +# Internal methods imported from mia +################################################################################ + +.is_a_bool <- mia:::.is_a_bool +.is_non_empty_string <- mia:::.is_non_empty_string +.is_non_empty_character <- mia:::.is_non_empty_character +.check_assay_present <- mia:::.check_assay_present +.check_and_get_altExp <- mia:::.check_and_get_altExp +.add_values_to_colData <- mia:::.add_values_to_colData + +################################################################################ +# Internal methods imported from miaViz +################################################################################ + +.check_metadata_variable <- miaViz:::.check_metadata_variable + +################################################################################ +# Input validation - matching plotBoxplot's pattern +################################################################################ + +#' Check inputs for statistical tests +#' @keywords internal +#' @noRd +#' @importFrom SummarizedExperiment colData rowData +.check_input <- function (x, assay.type, row.var, col.var, formula, + split.by, pair.by, features ){ + # Either assay.type, row.var or col.var must be specified + if( sum(c(is.null(assay.type), is.null(row.var), is.null(col.var))) != 2L ){ + stop("Please specify either 'assay.type', 'row.var', or 'col.var'.", + call. = FALSE + ) + } + # features cannot be specified if row.var or col.var is specified + if( is.null(assay.type) && !is.null(features) ){ + stop("'features' can only be specified when 'assay.type' is specified.", + call. = FALSE + ) + } + # As features points to rownames, the TSE must have rownames + if( !is.null(features) && is.null(rownames(x)) ){ + stop("'x' must have rownames.", call. = FALSE) + } + if( !(is.null(features) || + (is.character(features) && all(features %in% rownames(x)))) ){ + stop("'features' must be NULL or character vector specifying rownames.", + call. = FALSE + ) + } + # If assay was specified, check that it is correct + if( !is.null(assay.type) ){ + .check_assay_present(assay.type, x) + } + # Check row.var exists in rowData + if( !is.null(row.var) ){ + if( !.is_non_empty_string(row.var) ){ + stop("'row.var' must be a single character value.", call. = FALSE) + } + if( !row.var %in% colnames(rowData(x)) ){ + stop("'", row.var, "' not found in rowData(x).", call. = FALSE) + } + } + # Check col.var exists in colData + if( !is.null(col.var) ){ + if( !.is_non_empty_string(col.var) ){ + stop("'col.var' must be a single character value.", call. = FALSE) + } + if( !col.var %in% colnames(colData(x)) ){ + stop("'", col.var, "' not found in colData(x).", call. = FALSE) + } + } + # Check formula + group <- .get_rhs_var(formula) + # Check group variable exists in appropriate metadata + .check_metadata_var(x, group, assay.type, row.var, col.var) + # Check split.by variables + if( !is.null(split.by) ){ + if( !is.character(split.by) ){ + stop("'split.by' must be a character vector.", call. = FALSE) + } + for( var in split.by ){ + .check_metadata_var(x, var, assay.type, row.var, col.var) + } + } + # Check pair.by variable + if( !is.null(pair.by) ){ + if( !.is_non_empty_string(pair.by) ){ + stop("'pair.by' must be a single character value.", call. = FALSE) + } + .check_metadata_var(x, pair.by, assay.type, row.var, col.var) + } + # Return group for use in caller + group +} + +#' Check metadata variable exists in appropriate location +#' @keywords internal +#' @noRd +#' @importFrom SummarizedExperiment colData rowData +.check_metadata_var <- function (x, var, assay.type, row.var, col.var ){ + # When row.var is used, check rowData + # When assay.type or col.var is used, check colData + .check_metadata_variable( + tse = x, + var = var, + row = !is.null(row.var), + col = !is.null(assay.type) || !is.null(col.var) + ) + invisible(NULL) +} + +#' Extract RHS variable from formula +#' @keywords internal +#' @noRd +.get_rhs_var <- function (formula ){ + if( !inherits(formula, "formula") ){ + stop("'formula' must be a formula.", call. = FALSE) + } + # Get RHS of formula (handles both ~ group and value ~ group) + rhs <- as.character(formula)[length(as.character(formula))] + if( length(rhs) != 1 || rhs == "" ){ + stop("Formula must specify a grouping variable.", call. = FALSE) + } + rhs +} + +#' Check group has at least 2 levels +#' @keywords internal +#' @noRd +.check_group <- function (df, group ){ + if( !group %in% names(df) ){ + stop("'", group, "' not found in data.", call. = FALSE) + } + vals <- unique(df[[group]]) + vals <- vals[!is.na(vals)] + if( length(vals) < 2 ){ + stop("'group' must have at least 2 levels. Found ", length(vals), ".", + call. = FALSE + ) + } + invisible(NULL) +} + +################################################################################ +# Data extraction - matching plotBoxplot's approach +################################################################################ + +#' Get data for testing based on source +#' @keywords internal +#' @noRd +#' @importFrom SummarizedExperiment colData rowData +#' @importFrom mia meltSE +.get_data <- function (x, assay.type, row.var, col.var, + group, split.by, pair.by, features ){ + # Collect all variable names needed + all_vars <- c(group, split.by, pair.by) + all_vars <- unique(all_vars[!sapply(all_vars, is.null)]) + + # Get data based on source - matching getDA's smart routing pattern + if( !is.null(assay.type) ){ + # Automatically detect which variables are in rowData vs colData + row_vars <- vapply(all_vars, function (v ){ + v %in% colnames(rowData(x)) + }, logical(1L)) + col_vars <- all_vars[!row_vars] + row_vars <- all_vars[row_vars] + # Use meltSE with smart routing + df <- meltSE( + x, + assay.type = assay.type, + add.col = c(col.var, pair.by, col_vars), + add.row = c(row.var, row_vars) + ) + } + # If row.var was specified, get the data from rowData + if( !is.null(row.var) ){ + df <- rowData(x)[, c(row.var, all_vars), drop = FALSE] + } + # If col.var was specified, get the data from colData + if( !is.null(col.var) ){ + df <- colData(x)[, c(col.var, pair.by, all_vars), drop = FALSE] + } + + df <- as.data.frame(df) + attr(df, "pair.by") <- pair.by + .check_group(df, group) + + # Validate dependent variable is numeric + y_var <- if( !is.null(assay.type)) assay.type else if( !is.null(row.var)) row.var else col.var + if( !is.numeric(df[[y_var]]) ){ + stop("The dependent variable must be numeric.", call. = FALSE) + } + + df +} + +################################################################################ +# Universal DAA Engine +################################################################################ + +#' Unified engine for differential abundance analysis +#' @keywords internal +#' @noRd +#' @importFrom rstatix adjust_pvalue +#' @importFrom S4Vectors DataFrame +#' @importFrom dplyr across all_of group_split arrange +#' @importFrom purrr map_df +#' @importFrom stats as.formula +.calc_daa <- function (df, y, group, split.by, paired, FUN, + p.adjust.method = "fdr", features = NULL, ... ){ + # Identify pairing variable + pair.by <- attr(df, "pair.by") + + # Combine grouping vars: FeatureID (from meltSE) + split.by + grouping_vars <- c(split.by) + if( "FeatureID" %in% names(df) ){ + grouping_vars <- c("FeatureID", grouping_vars) + } + + # Build formula: y ~ group + formula <- as.formula(paste0(y, " ~ ", group)) + + # Run tests per group (FeatureID + split.by) + if( length(grouping_vars) > 0 ){ + res <- df |> + group_split(across(all_of(grouping_vars))) |> + map_df(function (sub_df ){ + # Ensure correct alignment for paired tests + if( paired && !is.null(pair.by) ){ + sub_df <- sub_df |> arrange(across(all_of(pair.by))) + } + + # Run test with error handling + test_res <- tryCatch( + { + FUN(sub_df, formula, paired = paired, ...) + }, + error = function (e ){ + # Print warning to terminal + msg <- conditionMessage(e) + warning("Statistical test failed for a feature: ", msg, + call. = FALSE + ) + # Create empty/NA result with error message + out <- data.frame( + .y. = y, + group1 = NA_character_, + group2 = NA_character_, + n1 = NA_integer_, + n2 = NA_integer_, + statistic = NA_real_, + p = NA_real_, + warning = msg + ) + return(out) + } + ) + + # Attach grouping info + group_info <- sub_df[1, grouping_vars, drop = FALSE] + res_with_groups <- cbind(test_res, group_info) + return(res_with_groups) + }) + } else { + # Single test (no FeatureID/split.by) + if( paired && !is.null(pair.by) ){ + df <- df |> arrange(across(all_of(pair.by))) + } + + res <- tryCatch( + { + FUN(df, formula, paired = paired, ...) + }, + error = function (e ){ + msg <- conditionMessage(e) + warning("Statistical test failed: ", msg, call. = FALSE) + out <- data.frame( + .y. = y, + group1 = NA_character_, + group2 = NA_character_, + n1 = NA_integer_, + n2 = NA_integer_, + statistic = NA_real_, + p = NA_real_, + warning = msg + ) + return(out) + } + ) + } + + # P-value adjustment + if( "p" %in% colnames(res) && any(!is.na(res$p)) ){ + res <- res |> adjust_pvalue(method = p.adjust.method) + } else { + res$p.adj <- NA_real_ + } + + # Subset to relevant features AFTER statistical calculations + if( !is.null(features) && "FeatureID" %in% colnames(res) ){ + res <- res[res$FeatureID %in% features, , drop = FALSE] + } + + res <- DataFrame(res) + return(res) +} diff --git a/README.Rmd b/README.Rmd index 5f5b812..6e5340d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,31 +13,31 @@ knitr::opts_chunk$set( ) ``` -# daa +# daa - Differential Abundance Analysis -[![GitHub issues](https://img.shields.io/github/issues/microbiome/daa)](https://github.com/microbiome/daa/issues) -[![GitHub pulls](https://img.shields.io/github/issues-pr/microbiome/daa)](https://github.com/microbiome/daa/pulls) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![Bioc release status](http://www.bioconductor.org/shields/build/release/bioc/daa.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/daa) -[![Bioc devel status](http://www.bioconductor.org/shields/build/devel/bioc/daa.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/daa) -[![Bioc downloads rank](https://bioconductor.org/shields/downloads/release/daa.svg)](http://bioconductor.org/packages/stats/bioc/daa/) -[![Bioc support](https://bioconductor.org/shields/posts/daa.svg)](https://support.bioconductor.org/tag/daa) -[![Bioc history](https://bioconductor.org/shields/years-in-bioc/daa.svg)](https://bioconductor.org/packages/release/bioc/html/daa.html#since) -[![Bioc last commit](https://bioconductor.org/shields/lastcommit/devel/bioc/daa.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/daa/) -[![Bioc dependencies](https://bioconductor.org/shields/dependencies/release/daa.svg)](https://bioconductor.org/packages/release/bioc/html/daa.html#since) -[![check-bioc](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml) +[![Bioc-release](http://bioconductor.org/shields/build/release/bioc/daa.svg)](http://bioconductor.org/packages/release/bioc/html/daa.html) [![Codecov test coverage](https://codecov.io/gh/microbiome/daa/graph/badge.svg)](https://app.codecov.io/gh/microbiome/daa) -The goal of `daa` is to provide method for differential abundance analysis -(DAA). +## Using the package -## Installation instructions +This package provides functions for differential abundance analysis (DAA) +of microbiome data. It works with `TreeSummarizedExperiment` and +`SummarizedExperiment` objects from the miaverse ecosystem. -Get the latest stable `R` release from [CRAN](http://cran.r-project.org/). Then -install `daa` from [Bioconductor](http://bioconductor.org/) using the following -code: +**Available methods:** + +- `getWilcoxon()` / `addWilcoxon()` - Wilcoxon rank-sum test +- `getTtest()` / `addTtest()` - t-test (Welch's or Student's) + +For more information, see the [function reference](https://microbiome.github.io/daa/reference/index.html) +and the [Orchestrating Microbiome Analysis (OMA)](https://microbiome.github.io/OMA) online book. + +## Installation + +### Bioc-release ```{r 'install', eval = FALSE} if (!requireNamespace("BiocManager", quietly = TRUE)) { @@ -47,65 +47,24 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) { BiocManager::install("daa") ``` -And the development version from [GitHub](https://github.com/microbiome/daa) -with: +### Bioc-devel ```{r 'install_dev', eval = FALSE} -BiocManager::install("microbiome/daa") -``` - -## Example - -This is a basic example which shows you how to solve a common problem: - -```{r example, eval = requireNamespace('daa')} -library("daa") -## basic example code -``` - -What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so: - -```{r cars} -summary(cars) -``` - -You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date. - -You can also embed plots, for example: +if (!requireNamespace("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} -```{r pressure, echo = FALSE} -plot(pressure) +BiocManager::install(version = "devel") +BiocManager::install("daa") ``` -In that case, don't forget to commit and push the resulting figure files, so they display on GitHub! - -## Citation +## Contributing -Below is the citation output from using `citation('daa')` in R. Please -run this yourself to check for any updates on how to cite __daa__. - -```{r 'citation', eval = requireNamespace('daa')} -print(citation("daa"), bibtex = TRUE) -``` - -Please note that the `daa` was only made possible thanks to many other R and -bioinformatics software authors, which are cited either in the vignettes and/or -the paper(s) describing this package. +Contributions are welcome in the form of feedback, issues, and pull requests. +See [contributor guidelines](CONTRIBUTING.md). ## Code of Conduct -Please note that the `daa` project is released with a -[Contributor Code of Conduct](http://bioconductor.org/about/code-of-conduct/) +Please note that the daa project is released with a +[Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. - -## Development tools - -* Continuous code testing is possible thanks to [GitHub actions](https://www.tidyverse.org/blog/2020/04/usethis-1-6-0/) through `r BiocStyle::CRANpkg('usethis')`, `r BiocStyle::CRANpkg('remotes')`, and `r BiocStyle::CRANpkg('rcmdcheck')` customized to use [Bioconductor's docker containers](https://www.bioconductor.org/help/docker/) and `r BiocStyle::Biocpkg('BiocCheck')`. -* Code coverage assessment is possible thanks to [codecov](https://codecov.io/gh) and `r BiocStyle::CRANpkg('covr')`. -* The [documentation website](http://microbiome.github.io/daa) is automatically updated thanks to `r BiocStyle::CRANpkg('pkgdown')`. -* The code is styled automatically thanks to `r BiocStyle::CRANpkg('styler')`. -* The documentation is formatted thanks to `r BiocStyle::CRANpkg('devtools')` and `r BiocStyle::CRANpkg('roxygen2')`. - -For more details, check the `dev` directory. - -This package was developed using `r BiocStyle::Biocpkg('biocthis')`. diff --git a/README.md b/README.md index 6bfef92..b4bc61f 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,36 @@ -# daa +# daa - Differential Abundance Analysis -[![GitHub -issues](https://img.shields.io/github/issues/microbiome/daa)](https://github.com/microbiome/daa/issues) -[![GitHub -pulls](https://img.shields.io/github/issues-pr/microbiome/daa)](https://github.com/microbiome/daa/pulls) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![Bioc release -status](http://www.bioconductor.org/shields/build/release/bioc/daa.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/daa) -[![Bioc devel -status](http://www.bioconductor.org/shields/build/devel/bioc/daa.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/daa) -[![Bioc downloads -rank](https://bioconductor.org/shields/downloads/release/daa.svg)](http://bioconductor.org/packages/stats/bioc/daa/) -[![Bioc -support](https://bioconductor.org/shields/posts/daa.svg)](https://support.bioconductor.org/tag/daa) -[![Bioc -history](https://bioconductor.org/shields/years-in-bioc/daa.svg)](https://bioconductor.org/packages/release/bioc/html/daa.html#since) -[![Bioc last -commit](https://bioconductor.org/shields/lastcommit/devel/bioc/daa.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/daa/) -[![Bioc -dependencies](https://bioconductor.org/shields/dependencies/release/daa.svg)](https://bioconductor.org/packages/release/bioc/html/daa.html#since) -[![check-bioc](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/microbiome/daa/actions/workflows/check-bioc.yml) +[![Bioc-release](http://bioconductor.org/shields/build/release/bioc/daa.svg)](http://bioconductor.org/packages/release/bioc/html/daa.html) [![Codecov test coverage](https://codecov.io/gh/microbiome/daa/graph/badge.svg)](https://app.codecov.io/gh/microbiome/daa) -The goal of `daa` is to provide method for differential abundance -analysis (DAA). +## Using the package -## Installation instructions +This package provides functions for differential abundance analysis +(DAA) of microbiome data. It works with `TreeSummarizedExperiment` and +`SummarizedExperiment` objects from the miaverse ecosystem. -Get the latest stable `R` release from -[CRAN](http://cran.r-project.org/). Then install `daa` from -[Bioconductor](http://bioconductor.org/) using the following code: +**Available methods:** + +- `getWilcoxon()` / `addWilcoxon()` - Wilcoxon rank-sum test +- `getTtest()` / `addTtest()` - t-test (Welch’s or Student’s) + +For more information, see the [function +reference](https://microbiome.github.io/daa/reference/index.html) and +the [Orchestrating Microbiome Analysis +(OMA)](https://microbiome.github.io/OMA) online book. + +## Installation + +### Bioc-release ``` r if (!requireNamespace("BiocManager", quietly = TRUE)) { @@ -47,103 +40,24 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) { BiocManager::install("daa") ``` -And the development version from -[GitHub](https://github.com/microbiome/daa) with: - -``` r -BiocManager::install("microbiome/daa") -``` - -## Example - -This is a basic example which shows you how to solve a common problem: +### Bioc-devel ``` r -library("daa") -## basic example code -``` - -What is special about using `README.Rmd` instead of just `README.md`? -You can include R chunks like so: +if (!requireNamespace("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} -``` r -summary(cars) -#> speed dist -#> Min. : 4.0 Min. : 2.00 -#> 1st Qu.:12.0 1st Qu.: 26.00 -#> Median :15.0 Median : 36.00 -#> Mean :15.4 Mean : 42.98 -#> 3rd Qu.:19.0 3rd Qu.: 56.00 -#> Max. :25.0 Max. :120.00 +BiocManager::install(version = "devel") +BiocManager::install("daa") ``` -You’ll still need to render `README.Rmd` regularly, to keep `README.md` -up-to-date. - -You can also embed plots, for example: - - - -In that case, don’t forget to commit and push the resulting figure -files, so they display on GitHub! - -## Citation - -Below is the citation output from using `citation('daa')` in R. Please -run this yourself to check for any updates on how to cite **daa**. - -``` r -print(citation("daa"), bibtex = TRUE) -#> To cite package 'daa' in publications use: -#> -#> Borman T, Lahti L, Muluh M (2025). _daa: Differential abundance -#> analysis_. R package version 0.99.0, -#> . -#> -#> A BibTeX entry for LaTeX users is -#> -#> @Manual{, -#> title = {daa: Differential abundance analysis}, -#> author = {Tuomas Borman and Leo Lahti and Muluh Muluh}, -#> year = {2025}, -#> note = {R package version 0.99.0}, -#> url = {https://github.com/microbiome/daa}, -#> } -``` +## Contributing -Please note that the `daa` was only made possible thanks to many other R -and bioinformatics software authors, which are cited either in the -vignettes and/or the paper(s) describing this package. +Contributions are welcome in the form of feedback, issues, and pull +requests. See [contributor guidelines](CONTRIBUTING.md). ## Code of Conduct -Please note that the `daa` project is released with a [Contributor Code -of Conduct](http://bioconductor.org/about/code-of-conduct/) By -contributing to this project, you agree to abide by its terms. - -## Development tools - -- Continuous code testing is possible thanks to [GitHub - actions](https://www.tidyverse.org/blog/2020/04/usethis-1-6-0/) - through *[usethis](https://CRAN.R-project.org/package=usethis)*, - *[remotes](https://CRAN.R-project.org/package=remotes)*, and - *[rcmdcheck](https://CRAN.R-project.org/package=rcmdcheck)* customized - to use [Bioconductor’s docker - containers](https://www.bioconductor.org/help/docker/) and - *[BiocCheck](https://bioconductor.org/packages/3.22/BiocCheck)*. -- Code coverage assessment is possible thanks to - [codecov](https://codecov.io/gh) and - *[covr](https://CRAN.R-project.org/package=covr)*. -- The [documentation website](http://microbiome.github.io/daa) is - automatically updated thanks to - *[pkgdown](https://CRAN.R-project.org/package=pkgdown)*. -- The code is styled automatically thanks to - *[styler](https://CRAN.R-project.org/package=styler)*. -- The documentation is formatted thanks to - *[devtools](https://CRAN.R-project.org/package=devtools)* and - *[roxygen2](https://CRAN.R-project.org/package=roxygen2)*. - -For more details, check the `dev` directory. - -This package was developed using -*[biocthis](https://bioconductor.org/packages/3.22/biocthis)*. +Please note that the daa project is released with a [Contributor Code of +Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). +By contributing to this project, you agree to abide by its terms. diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png deleted file mode 100644 index e208311..0000000 Binary files a/man/figures/README-pressure-1.png and /dev/null differ diff --git a/man/getTtest.Rd b/man/getTtest.Rd new file mode 100644 index 0000000..1553598 --- /dev/null +++ b/man/getTtest.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/getTtest.R +\name{getTtest} +\alias{getTtest} +\alias{addTtest} +\alias{getTtest,SingleCellExperiment-method} +\alias{getTtest,SummarizedExperiment-method} +\alias{addTtest,SummarizedExperiment-method} +\title{Perform t-test} +\usage{ +getTtest(x, ...) + +addTtest(x, ...) + +\S4method{getTtest}{SingleCellExperiment}(x, ...) + +\S4method{getTtest}{SummarizedExperiment}( + x, + assay.type = NULL, + row.var = NULL, + col.var = NULL, + formula, + split.by = NULL, + pair.by = NULL, + features = NULL, + var.equal = FALSE, + p.adjust.method = "fdr", + ... +) + +\S4method{addTtest}{SummarizedExperiment}(x, name = "ttest", ...) +} +\arguments{ +\item{x}{A \code{SummarizedExperiment} object.} + +\item{...}{Additional arguments passed to \code{rstatix::t_test}.} + +\item{assay.type}{\code{Character scalar} or \code{NULL}. Specifies assay to +test. Tests are run per feature. (Default: \code{NULL} )} + +\item{row.var}{\code{Character scalar} or \code{NULL}. Specifies variable +from \code{rowData(x)} to test. (Default: \code{NULL} )} + +\item{col.var}{\code{Character scalar} or \code{NULL}. Specifies variable +from \code{colData(x)} to test. (Default: \code{NULL} )} + +\item{formula}{\code{formula}. A formula specifying the grouping variable, +e.g., \code{~ SampleType}. The RHS specifies the comparison groups. +For >2 levels, pairwise comparisons are performed.} + +\item{split.by}{\code{Character vector} or \code{NULL}. Columns to split by. +Tests are run separately for each combination. (Default: \code{NULL} )} + +\item{pair.by}{\code{Character scalar} or \code{NULL}. Column for pairing +samples in paired tests. (Default: \code{NULL} )} + +\item{features}{\code{Character vector} or \code{NULL}. Specific features +to test when using \code{assay.type}. (Default: \code{NULL} )} + +\item{var.equal}{\code{Logical scalar}. Assume equal variances? +(Default: \code{FALSE} )} + +\item{p.adjust.method}{\code{Character scalar}. Method for p-value +adjustment. (Default: \code{"fdr"} )} + +\item{name}{\code{Character scalar}. Column name prefix for results. +(Default: \code{"ttest"} )} +} +\value{ +\code{getTtest} returns a \code{DataFrame} with test results. +\code{addTtest} returns \code{x} with results added to \code{rowData(x)}. +} +\description{ +These functions perform t-test to compare values between two groups. +} +\details{ +By default, Welch's t-test is used which does not assume equal variances. +Set \code{var.equal = TRUE} for Student's t-test. + +Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var} +to define the values to test. +} +\examples{ +data(GlobalPatterns, package = "mia") +tse <- GlobalPatterns +tse <- tse[1:50, tse$SampleType \%in\% c("Feces", "Skin")] + +# Test assay values (per feature) +res <- getTtest(tse, assay.type = "counts", formula = ~SampleType) + +# Test colData variable (e.g., alpha diversity) +tse <- mia::addAlpha(tse, index = "shannon_diversity") +res <- getTtest(tse, col.var = "shannon_diversity", formula = ~SampleType) + +} +\seealso{ +\code{\link[rstatix:t_test]{rstatix::t_test}}, +\code{\link[daa:getWilcoxon]{getWilcoxon}} +} diff --git a/man/getWilcoxon.Rd b/man/getWilcoxon.Rd new file mode 100644 index 0000000..964dd32 --- /dev/null +++ b/man/getWilcoxon.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/getWilcoxon.R +\name{getWilcoxon} +\alias{getWilcoxon} +\alias{addWilcoxon} +\alias{getWilcoxon,SingleCellExperiment-method} +\alias{getWilcoxon,SummarizedExperiment-method} +\alias{addWilcoxon,SummarizedExperiment-method} +\title{Perform Wilcoxon rank-sum test} +\usage{ +getWilcoxon(x, ...) + +addWilcoxon(x, ...) + +\S4method{getWilcoxon}{SingleCellExperiment}(x, ...) + +\S4method{getWilcoxon}{SummarizedExperiment}( + x, + assay.type = NULL, + row.var = NULL, + col.var = NULL, + formula, + split.by = NULL, + pair.by = NULL, + features = NULL, + p.adjust.method = "fdr", + ... +) + +\S4method{addWilcoxon}{SummarizedExperiment}(x, name = "wilcoxon", ...) +} +\arguments{ +\item{x}{A \code{SummarizedExperiment} object.} + +\item{...}{Additional arguments passed to \code{rstatix::wilcox_test}.} + +\item{assay.type}{\code{Character scalar} or \code{NULL}. Specifies assay to +test. Tests are run per feature. (Default: \code{NULL} )} + +\item{row.var}{\code{Character scalar} or \code{NULL}. Specifies variable +from \code{rowData(x)} to test. (Default: \code{NULL} )} + +\item{col.var}{\code{Character scalar} or \code{NULL}. Specifies variable +from \code{colData(x)} to test. (Default: \code{NULL} )} + +\item{formula}{\code{formula}. A formula specifying the grouping variable, +e.g., \code{~ SampleType}. The RHS specifies the comparison groups. +For >2 levels, pairwise comparisons are performed.} + +\item{split.by}{\code{Character vector} or \code{NULL}. Columns to split by. +Tests are run separately for each combination. (Default: \code{NULL} )} + +\item{pair.by}{\code{Character scalar} or \code{NULL}. Column for pairing +samples in paired tests. (Default: \code{NULL} )} + +\item{features}{\code{Character vector} or \code{NULL}. Specific features +to test when using \code{assay.type}. (Default: \code{NULL} )} + +\item{p.adjust.method}{\code{Character scalar}. Method for p-value +adjustment. (Default: \code{"fdr"} )} + +\item{name}{\code{Character scalar}. Column name prefix for results. +(Default: \code{"wilcoxon"} )} +} +\value{ +\code{getWilcoxon} returns a \code{DataFrame} with test results. +\code{addWilcoxon} returns \code{x} with results added to \code{rowData(x)}. +} +\description{ +These functions perform Wilcoxon rank-sum test (Mann-Whitney U) to compare +values between two groups. +} +\details{ +The Wilcoxon rank-sum test is a non-parametric test used to compare two +independent groups. It is suitable when data does not meet normality +assumptions. + +Specify exactly one of \code{assay.type}, \code{row.var}, or \code{col.var} +to define the values to test. +} +\examples{ +data(GlobalPatterns, package = "mia") +tse <- GlobalPatterns +tse <- tse[1:50, tse$SampleType \%in\% c("Feces", "Skin")] + +# Test assay values (per feature) +res <- getWilcoxon(tse, assay.type = "counts", formula = ~SampleType) + +# Test colData variable (e.g., alpha diversity) +tse <- mia::addAlpha(tse, index = "shannon_diversity") +res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~SampleType) + +} +\seealso{ +\code{\link[rstatix:wilcox_test]{rstatix::wilcox_test}}, +\code{\link[daa:getTtest]{getTtest}} +} diff --git a/tests/testthat.R b/tests/testthat.R index 54620d4..f2759bb 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,10 +1,9 @@ # This file is part of the standard setup for testthat. # It is recommended that you do not modify it. # -# Where should you do additional test configuration? -# Learn more about the roles of various files in: -# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview -# * https://testthat.r-lib.org/articles/special-files.html +# Where should you do your testing? +# Learn more about the recommended testthat workflow here: +# https://r-pkgs.org/testing-design.html#sec-tests-files-overview library(testthat) library(daa) diff --git a/tests/testthat/test-example_test.R b/tests/testthat/test-example_test.R deleted file mode 100644 index 94b301e..0000000 --- a/tests/testthat/test-example_test.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) -}) diff --git a/tests/testthat/test-getTtest.R b/tests/testthat/test-getTtest.R new file mode 100644 index 0000000..c2e7b7f --- /dev/null +++ b/tests/testthat/test-getTtest.R @@ -0,0 +1,144 @@ +# Tests for getTtest + +# Use HintikkaXOData - has good variance for statistical tests +# Need getWithColData to get colData from MAE +data(HintikkaXOData, package = "mia") +mae <- HintikkaXOData +tse <- MultiAssayExperiment::getWithColData(mae, "microbiota") +tse <- mia::transformAssay(tse, method = "relabundance") +tse <- tse[1:10, ] + +################################################################################ +# assay.type tests (per-feature) +################################################################################ + +test_that("getTtest with assay.type returns DataFrame", { + result <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat + ) + + expect_s4_class(result, "DataFrame") + expect_true("p" %in% names(result)) + expect_true("p.adj" %in% names(result)) +}) + +test_that("getTtest with assay.type returns one row per feature", { + result <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat + ) + expect_equal(nrow(result), nrow(tse)) +}) + +test_that("getTtest respects features argument", { + feat_subset <- rownames(tse)[1:5] + result <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat, features = feat_subset + ) + expect_equal(nrow(result), 5) +}) + +################################################################################ +# col.var tests (colData variable) +################################################################################ + +test_that("getTtest with col.var works", { + tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity") + result <- getTtest(tse_alpha, + col.var = "shannon_diversity", + formula = ~Fat + ) + + expect_s4_class(result, "DataFrame") + expect_equal(nrow(result), 1) # Single test, not per-feature +}) + +################################################################################ +# Validation tests +################################################################################ + +test_that("getTtest fails without data source", { + expect_error(getTtest(tse, formula = ~Fat), "either") +}) + +test_that("getTtest fails with invalid formula", { + expect_error( + getTtest(tse, assay.type = "relabundance", formula = "not a formula"), + "must be a formula" + ) +}) + +test_that("getTtest var.equal parameter works", { + result_welch <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat, var.equal = FALSE + ) + result_student <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat, var.equal = TRUE + ) + expect_true(is.numeric(result_welch$p)) + expect_true(is.numeric(result_student$p)) +}) + +################################################################################ +# Robustness tests +################################################################################ + +test_that("getTtest handles zero variance with warnings", { + # Create a feature with zero variance to trigger error in t-test + tse_sparse <- tse + assay(tse_sparse, "relabundance")[1, ] <- 1 # All same values + + expect_warning( + result <- getTtest(tse_sparse, + assay.type = "relabundance", + formula = ~Fat + ), + "failed" + ) + expect_true(any(is.na(result$p))) + expect_true("warning" %in% colnames(result)) +}) + +test_that("getTtest works with un-sorted paired data", { + # Create paired data and shuffle it + tse_paired <- tse[, 1:10] + tse_paired$Subject <- rep(1:5, each = 2) + tse_paired$Time <- rep(c("T1", "T2"), 5) + + # Shuffle + idx <- sample(ncol(tse_paired)) + tse_shuffled <- tse_paired[, idx] + + # This should pass because we sort internally by Subject + expect_silent( + result <- getTtest(tse_shuffled, + assay.type = "relabundance", + formula = ~Time, pair.by = "Subject" + ) + ) + expect_s4_class(result, "DataFrame") +}) + +test_that("getTtest p.adjust.method parameter works", { + result <- getTtest(tse, + assay.type = "relabundance", + formula = ~Fat, p.adjust.method = "bonferroni" + ) + expect_true("p.adj" %in% names(result)) +}) + +################################################################################ +# addTtest tests +################################################################################ + +test_that("addTtest adds p.adj to rowData", { + result <- addTtest(tse, + assay.type = "relabundance", + formula = ~Fat + ) + expect_true("ttest_padj" %in% colnames(rowData(result))) +}) diff --git a/tests/testthat/test-getWilcoxon.R b/tests/testthat/test-getWilcoxon.R new file mode 100644 index 0000000..0c81d7e --- /dev/null +++ b/tests/testthat/test-getWilcoxon.R @@ -0,0 +1,130 @@ +# Tests for getWilcoxon + +# Use HintikkaXOData - has good variance for statistical tests +data(HintikkaXOData, package = "mia") +mae <- HintikkaXOData +tse <- MultiAssayExperiment::getWithColData(mae, "microbiota") +tse <- mia::transformAssay(tse, method = "relabundance") +tse <- tse[1:10, ] + +################################################################################ +# assay.type tests (per-feature) +################################################################################ + +test_that("getWilcoxon with assay.type returns DataFrame", { + result <- getWilcoxon(tse, + assay.type = "relabundance", + formula = ~Fat + ) + + expect_s4_class(result, "DataFrame") + expect_true("p" %in% names(result)) + expect_true("p.adj" %in% names(result)) +}) + +test_that("getWilcoxon with assay.type returns one row per feature", { + result <- getWilcoxon(tse, + assay.type = "relabundance", + formula = ~Fat + ) + expect_equal(nrow(result), nrow(tse)) +}) + +test_that("getWilcoxon respects features argument", { + feat_subset <- rownames(tse)[1:5] + result <- getWilcoxon(tse, + assay.type = "relabundance", + formula = ~Fat, features = feat_subset + ) + expect_equal(nrow(result), 5) +}) + +################################################################################ +# col.var tests (colData variable) +################################################################################ + +test_that("getWilcoxon with col.var works", { + tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity") + result <- getWilcoxon(tse_alpha, + col.var = "shannon_diversity", + formula = ~Fat + ) + + expect_s4_class(result, "DataFrame") + expect_equal(nrow(result), 1) # Single test, not per-feature +}) + +################################################################################ +# Robustness tests +################################################################################ + +test_that("getWilcoxon handles sparse data with warnings", { + # Create a feature with all NAs to trigger error + tse_sparse <- tse + assay(tse_sparse, "relabundance")[1, ] <- NA_real_ + + expect_warning( + result <- getWilcoxon(tse_sparse, + assay.type = "relabundance", + formula = ~Fat + ), + "failed" + ) + expect_true(any(is.na(result$p))) + expect_true("warning" %in% colnames(result)) +}) + +test_that("getWilcoxon works with un-sorted paired data", { + # Create paired data and shuffle it + tse_paired <- tse[, 1:10] + tse_paired$Subject <- rep(1:5, each = 2) + tse_paired$Time <- rep(c("T1", "T2"), 5) + + # Shuffle + idx <- sample(ncol(tse_paired)) + tse_shuffled <- tse_paired[, idx] + + # This should pass because we sort internally by Subject + expect_silent( + result <- getWilcoxon(tse_shuffled, + assay.type = "relabundance", + formula = ~Time, pair.by = "Subject" + ) + ) + expect_s4_class(result, "DataFrame") +}) + +################################################################################ +# Validation tests +################################################################################ + +test_that("getWilcoxon fails without data source", { + expect_error(getWilcoxon(tse, formula = ~Fat), "either") +}) + +test_that("getWilcoxon fails with invalid formula", { + expect_error( + getWilcoxon(tse, assay.type = "relabundance", formula = "not a formula"), + "must be a formula" + ) +}) + +test_that("getWilcoxon p.adjust.method parameter works", { + result <- getWilcoxon(tse, + assay.type = "relabundance", + formula = ~Fat, p.adjust.method = "bonferroni" + ) + expect_true("p.adj" %in% names(result)) +}) + +################################################################################ +# addWilcoxon tests +################################################################################ + +test_that("addWilcoxon adds p.adj to rowData", { + result <- addWilcoxon(tse, + assay.type = "relabundance", + formula = ~Fat + ) + expect_true("wilcoxon_padj" %in% colnames(rowData(result))) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 0000000..1c4e2ec --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,149 @@ +# Tests for utils.R functions + +# Use HintikkaXOData for reliable tests +# Need getWithColData to get colData from MAE +data(HintikkaXOData, package = "mia") +mae <- HintikkaXOData +tse <- MultiAssayExperiment::getWithColData(mae, "microbiota") +tse <- mia::transformAssay(tse, method = "relabundance") +tse <- tse[1:10, ] + +################################################################################ +# .check_input tests +################################################################################ + +test_that(".check_input requires exactly one data source", { + expect_error( + .check_input(tse, NULL, NULL, NULL, ~Fat, NULL, NULL, NULL), + "either" + ) + expect_error( + .check_input( + tse, "relabundance", "var", NULL, ~Fat, + NULL, NULL, NULL + ), + "either" + ) +}) + +test_that(".check_input validates features only with assay.type", { + expect_error( + .check_input(tse, NULL, NULL, "Fat", ~Fat, NULL, NULL, + features = "Taxa1" + ), + "features" + ) +}) + +test_that(".check_input validates features exist", { + expect_error( + .check_input(tse, "relabundance", NULL, NULL, ~Fat, NULL, NULL, + features = c("nonexistent") + ), + "features" + ) +}) + +test_that(".check_input validates formula", { + expect_error( + .check_input( + tse, "relabundance", NULL, NULL, "not a formula", + NULL, NULL, NULL + ), + "must be a formula" + ) +}) + +test_that(".check_input validates formula variable exists", { + expect_error( + .check_input( + tse, "relabundance", NULL, NULL, ~nonexistent, + NULL, NULL, NULL + ), + "must be a single character value from the following options" + ) +}) + +test_that(".check_input validates split.by exists", { + expect_error( + .check_input( + tse, "relabundance", NULL, NULL, ~Fat, + "nonexistent", NULL, NULL + ), + "must be a single character value from the following options" + ) +}) + +test_that(".check_input validates pair.by exists", { + expect_error( + .check_input( + tse, "relabundance", NULL, NULL, ~Fat, NULL, + "nonexistent", NULL + ), + "must be a single character value from the following options" + ) +}) + +test_that(".check_input passes with valid inputs", { + expect_silent( + .check_input( + tse, "relabundance", NULL, NULL, ~Fat, + NULL, NULL, NULL + ) + ) +}) + +################################################################################ +# .get_rhs_var tests +################################################################################ + +test_that(".get_rhs_var extracts group from formula", { + result <- .get_rhs_var(~Fat) + expect_equal(result, "Fat") +}) + +test_that(".get_rhs_var fails with non-formula", { + expect_error(.get_rhs_var("not a formula"), "must be a formula") +}) + +################################################################################ +# .check_group tests +################################################################################ + +test_that(".check_group validates at least 2 levels", { + df <- data.frame(group = c("A"), value = 1) + expect_error(.check_group(df, "group"), "at least 2 levels") +}) + +test_that(".check_group passes with valid 2-level group", { + df <- data.frame(group = c("A", "B"), value = 1:2) + expect_silent(.check_group(df, "group")) +}) + +################################################################################ +# .get_data tests +################################################################################ + +test_that(".get_data with assay.type returns correct format", { + df <- .get_data( + tse, "relabundance", NULL, NULL, "Fat", + NULL, NULL, NULL + ) + + expect_s3_class(df, "data.frame") + expect_true("relabundance" %in% names(df)) + expect_true("Fat" %in% names(df)) + expect_true("FeatureID" %in% names(df)) +}) + +test_that(".get_data with col.var returns correct format", { + tse_alpha <- mia::addAlpha(tse, index = "shannon_diversity") + df <- .get_data( + tse_alpha, NULL, NULL, "shannon_diversity", + "Fat", NULL, NULL, NULL + ) + + expect_s3_class(df, "data.frame") + expect_true("shannon_diversity" %in% names(df)) + expect_true("Fat" %in% names(df)) +}) diff --git a/vignettes/daa.Rmd b/vignettes/daa.Rmd index 86bb835..1348a11 100644 --- a/vignettes/daa.Rmd +++ b/vignettes/daa.Rmd @@ -1,19 +1,9 @@ --- title: "Introduction to daa" -author: - - name: Your name - affiliation: - - Your institution - email: your.email@somewhere.com output: BiocStyle::html_document: - self_contained: yes toc: true toc_float: true - toc_depth: 2 - code_folding: show -date: "`r doc_date()`" -package: "`r pkg_ver('daa')`" vignette: > %\VignetteIndexEntry{Introduction to daa} %\VignetteEngine{knitr::rmarkdown} @@ -22,113 +12,85 @@ vignette: > ```{r setup, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html + collapse = TRUE, + comment = "#>" ) ``` +# Introduction -```{r vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE} -## Bib setup -library("RefManageR") - -## Write bibliography information -bib <- c( - R = citation(), - BiocStyle = citation("BiocStyle")[1], - knitr = citation("knitr")[1], - RefManageR = citation("RefManageR")[1], - rmarkdown = citation("rmarkdown")[1], - sessioninfo = citation("sessioninfo")[1], - testthat = citation("testthat")[1], - daa = citation("daa")[1] -) -``` - -# Basics - -## Install `daa` +The **daa** package provides methods for differential abundance analysis (DAA) +of microbiome data. It works with `TreeSummarizedExperiment` and +`SummarizedExperiment` objects from the miaverse ecosystem. -`R` is an open-source statistical environment which can be easily modified to enhance its functionality via packages. `r Biocpkg("daa")` is a `R` package available via the [Bioconductor](http://bioconductor.org) repository for packages. `R` can be installed on any operating system from [CRAN](https://cran.r-project.org/) after which you can install `r Biocpkg("daa")` by using the following commands in your `R` session: +## Installation -```{r "install", eval = FALSE} +```{r install, eval = FALSE} if (!requireNamespace("BiocManager", quietly = TRUE)) { - install.packages("BiocManager") + install.packages("BiocManager") } BiocManager::install("daa") - -## Check that you have a valid Bioconductor installation -BiocManager::valid() ``` -## Required knowledge +# Available Methods -`r Biocpkg("daa")` is based on many other packages and in particular in those that have implemented the infrastructure needed for dealing with RNA-seq data (EDIT!). That is, packages like `r Biocpkg("SummarizedExperiment")` (EDIT!). +## Wilcoxon rank-sum test -If you are asking yourself the question "Where do I start using Bioconductor?" you might be interested in [this blog post](http://lcolladotor.github.io/2014/10/16/startbioc/#.VkOKbq6rRuU). +The `getWilcoxon()` function performs nonparametric Wilcoxon rank-sum tests +for each feature, comparing two groups. -## Asking for help +```{r wilcoxon, message = FALSE} +library(daa) +library(mia) -As package developers, we try to explain clearly how to use our packages and in which order to use the functions. But `R` and `Bioconductor` have a steep learning curve so it is critical to learn where to ask for help. The blog post quoted above mentions some but we would like to highlight the [Bioconductor support site](https://support.bioconductor.org/) as the main resource for getting help: remember to use the `daa` tag and check [the older posts](https://support.bioconductor.org/tag/daa/). Other alternatives are available such as creating GitHub issues and tweeting. However, please note that if you want to receive help you should adhere to the [posting guidelines](http://www.bioconductor.org/help/support/posting-guide/). It is particularly critical that you provide a small reproducible example and your session information so package developers can track down the source of the error. +# Load example data (HintikkaXOData is used in unit tests) +data(HintikkaXOData, package = "mia") +tse <- MultiAssayExperiment::getWithColData(HintikkaXOData, "microbiota") -## Citing `daa` +# Standardize assay data +tse <- transformAssay(tse, method = "relabundance") -We hope that `r Biocpkg("daa")` will be useful for your research. Please use the following information to cite the package and the overall approach. Thank you! +# Subset for demonstration +tse <- tse[1:50, ] -```{r "citation"} -## Citation info -citation("daa") +# Run Wilcoxon test comparing groups in 'Fat' metadata column +res <- getWilcoxon(tse, assay.type = "relabundance", formula = ~Fat) +head(res) ``` -# Quick start to using `daa` +Use `addWilcoxon()` to store results in `rowData()`: -```{r "start", message=FALSE} -library("daa") +```{r add_wilcoxon} +tse <- addWilcoxon(tse, assay.type = "relabundance", formula = ~Fat) +head(rowData(tse)[, "wilcoxon_padj", drop = FALSE]) ``` -Edit this as you see fit =) - -Here is an example of you can cite your package inside the vignette: - -* `r Biocpkg("daa")` `r Citep(bib[["daa"]])` +## t-test +The `getTtest()` function performs t-tests (Welch's by default). - -# Reproducibility - -The `r Biocpkg("daa")` package `r Citep(bib[["daa"]])` was made possible thanks to: - -* R `r Citep(bib[["R"]])` -* `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])` -* `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])` -* `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])` -* `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])` -* `r CRANpkg("sessioninfo")` `r Citep(bib[["sessioninfo"]])` -* `r CRANpkg("testthat")` `r Citep(bib[["testthat"]])` - -This package was developed using `r BiocStyle::Biocpkg("biocthis")`. - -`R` session information. - -```{r reproduce3, echo=FALSE} -## Session info -library("sessioninfo") -options(width = 120) -session_info() +```{r ttest} +res <- getTtest(tse, assay.type = "relabundance", formula = ~Fat) +head(res) ``` +## Testing metadata variables +You can also test variables from `colData` or `rowData`. For example, testing +alpha diversity indices: -# Bibliography +```{r metadata} +# Add alpha diversity to colData +tse <- mia::addAlpha(tse, index = "shannon_diversity") -This vignette was generated using `r Biocpkg("BiocStyle")` `r Citep(bib[["BiocStyle"]])` -with `r CRANpkg("knitr")` `r Citep(bib[["knitr"]])` and `r CRANpkg("rmarkdown")` `r Citep(bib[["rmarkdown"]])` running behind the scenes. +# Test diversity differences between groups +res <- getWilcoxon(tse, col.var = "shannon_diversity", formula = ~Fat) +head(res) +``` -Citations made with `r CRANpkg("RefManageR")` `r Citep(bib[["RefManageR"]])`. +# Session Info -```{r vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE} -## Print bibliography -PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html")) +```{r session} +sessionInfo() ```