From 39e527d785c2f563af27ecf96ce7fb7504e4ee2b Mon Sep 17 00:00:00 2001 From: Pierre Marijon Date: Fri, 18 Jun 2021 11:58:18 +0200 Subject: [PATCH 1/2] Replace read.csv by readr::read_csv to save many memory --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/facets-wrapper.R | 19 ++++++++++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 42b3531..9661a55 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,6 +8,6 @@ Maintainer: Venkatraman E. Seshan Description: Algorithm to implement Fraction and Allelic Copy number Estimate from Tumor/normal Sequencing. License: GPL (>= 2) -Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0) +Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), readr (>= 1.3.1) Remotes: veseshan/pctGCdata NeedsCompilation: yes diff --git a/NAMESPACE b/NAMESPACE index 936d491..669199c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ useDynLib(facets) import(stats,graphics) +import("readr") importFrom("utils", "read.csv") importFrom("grDevices", "colorRampPalette", "rainbow") importFrom("pctGCdata", "getGCpct") diff --git a/R/facets-wrapper.R b/R/facets-wrapper.R index 7b2de24..61b8d30 100644 --- a/R/facets-wrapper.R +++ b/R/facets-wrapper.R @@ -6,7 +6,24 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per rcmat <- as.data.frame(rcmat, stringsAsFactors=FALSE) } else { # read the read count matrix generated by snp-pileup.cpp code - pileup <- read.csv(filename, stringsAsFactors=FALSE, colClasses=rep(c("character", "numeric","character", "numeric"), c(1,1,2,8))) + pileup <- readr::read_csv(filename, + col_types=cols( + Chromosome = col_character(), + Position = col_integer(), + Ref = col_character(), + Alt = col_character(), + File1R = col_integer(), + File1A = col_integer(), + File1E = col_integer(), + File1D = col_integer(), + File2R = col_integer(), + File2A = col_integer(), + File2E = col_integer(), + File2D = col_integer() + ), + guess_max=0 + ) + # remove chr if present in Chrom if (grepl("chr",pileup$Chromosome[1])) { pileup$Chromosome <- gsub("chr", "", pileup$Chromosome) From c14d8542ab92e15ad364ec544606594330972c06 Mon Sep 17 00:00:00 2001 From: Pierre Marijon Date: Fri, 18 Jun 2021 14:46:28 +0200 Subject: [PATCH 2/2] Use tidyverse pipe system to work on some dataframe --- DESCRIPTION | 2 +- NAMESPACE | 2 +- R/facets-findDipLogR.R | 5 ++- R/facets-procreads.R | 82 +++++++++++++++++++++--------------------- R/facets-segment.R | 14 ++++++-- R/facets-wrapper.R | 72 ++++++++++++++++++++++++++++--------- 6 files changed, 112 insertions(+), 65 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9661a55..2d0d540 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,6 +8,6 @@ Maintainer: Venkatraman E. Seshan Description: Algorithm to implement Fraction and Allelic Copy number Estimate from Tumor/normal Sequencing. License: GPL (>= 2) -Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), readr (>= 1.3.1) +Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), tidyverse (>= 1.2.1) Remotes: veseshan/pctGCdata NeedsCompilation: yes diff --git a/NAMESPACE b/NAMESPACE index 669199c..ececd72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ useDynLib(facets) import(stats,graphics) -import("readr") +import("tidyverse") importFrom("utils", "read.csv") importFrom("grDevices", "colorRampPalette", "rainbow") importFrom("pctGCdata", "getGCpct") diff --git a/R/facets-findDipLogR.R b/R/facets-findDipLogR.R index 4d4c4dc..aecc851 100644 --- a/R/facets-findDipLogR.R +++ b/R/facets-findDipLogR.R @@ -93,7 +93,7 @@ findDiploidLogR <- function(out, cnlr) { if (diff(dipLogR) < log2(1.25)) flags <- c(flags, "possibly subclonal 2+2 states present") # cat("dipLogR =", dipLogR, "\n") - # print(out0) + # first remove the balanced segs out1 <- out0[-bsegs,] # is dipLogR[1] the 1+1 level; if so all lower cnlr.med values are losses @@ -121,7 +121,6 @@ findDiploidLogR <- function(out, cnlr) { out2 <- t(sapply(1:nrow(out1), function(i, dlr, out1) { acnsplit(dlr, out1$cnlr.median[i], out1$mafR[i]) }, dipLogR[1], out1)) - # print(out2) out1$acn <- apply(out2[,2*(1:3), drop=FALSE], 1, which.min) # proportion of genome that fits 1+0, 2+0 & 3+0 acn1prop <- sum(out1$num.mark[out1$acn == 1])/nsnps @@ -207,7 +206,7 @@ findDiploidLogR <- function(out, cnlr) { } } #plot(ocnlevels0, dev1, pch=16, col=colr) - #print(dev1) + cn2logR <- ocnlevels0[which.min(dev1)] # if wgd.likely is non-null add dipLogR[1] if (!is.null(wgd.likely)) { diff --git a/R/facets-procreads.R b/R/facets-procreads.R index 85b8540..af8cdc1 100644 --- a/R/facets-procreads.R +++ b/R/facets-procreads.R @@ -2,33 +2,33 @@ procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23, unmatched=FALSE, ndepthmax=1000) { # keep only chromsomes 1-22 & X for humans and 1-19, X for mice # for other genomes (gbuild = udef) nX is number of autosomes plus 1 - chromlevels <- c(1:(nX-1),"X") - chr.keep <- rcmat$Chromosome %in% chromlevels - # keep only snps with normal read depth between ndepth and 1000 - depthN.keep <- (rcmat$NOR.DP >= ndepth) & (rcmat$NOR.DP < ndepthmax) - # reduce the data frame to these snps - rcmat <- rcmat[chr.keep & depthN.keep,] - # output data frame - out <- list() - out$chrom <- rcmat$Chromosome - out$maploc <- rcmat$Position - out$rCountT <- rcmat$TUM.DP - out$rCountN <- rcmat$NOR.DP - out$vafT <- 1 - rcmat$TUM.RD/rcmat$TUM.DP - out$vafN <- 1 - rcmat$NOR.RD/rcmat$NOR.DP - # make chromosome ordered and numeric - out$chrom <- as.numeric(ordered(out$chrom, levels=chromlevels)) + rcmat <- rcmat %>% + tibble::add_column(vafT = 1 - rcmat$TUM.RD/rcmat$TUM.DP) %>% + tibble::add_column(vafN = 1 - rcmat$NOR.RD/rcmat$NOR.DP) %>% + dplyr::filter(Chromosome %in% c(1:(nX-1), "X")) %>% + dplyr::filter((NOR.DP >= ndepth) & (NOR.DP < ndepthmax)) %>% + dplyr::filter(TUM.DP>0) %>% + dplyr::rename(chrom = Chromosome) %>% + dplyr::rename(maploc = Position) %>% + dplyr::rename(rCountT = TUM.DP) %>% + dplyr::rename(rCountN = NOR.DP) %>% + + dplyr::arrange(chrom) + + # call a snp heterozygous if min(vafN, 1-mafN) > het.thresh if (unmatched) { if (het.thresh == 0.25) het.thresh <- 0.1 - out$het <- 1*(pmin(out$vafT, 1-out$vafT) > het.thresh & out$rCountT >= 50) + rcmat$het <- 1*(pmin(rcmat$vafT, 1-rcmat$vafT) > het.thresh & rcmat$rCountT >= 50) } else { - out$het <- 1*(pmin(out$vafN, 1-out$vafN) > het.thresh) + rcmat$het <- 1*(pmin(rcmat$vafN, 1-rcmat$vafN) > het.thresh) } # scan maploc for snps that are close to one another (within snp.nbhd bases) # heep all the hets (should change if too close) and only one from a nbhd - out$keep <- scanSnp(out$maploc, out$het, snp.nbhd) - as.data.frame(out) + rcmat$keep <- scanSnp(rcmat$maploc, rcmat$het, snp.nbhd) + rcmat <- rcmat %>% + dplyr::filter(keep == 1) + as.data.frame(rcmat) } scanSnp <- function(maploc, het, nbhd) { @@ -43,33 +43,33 @@ scanSnp <- function(maploc, het, nbhd) { } # obtain logR and logOR from read counts and GC-correct logR -counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) { - out <- mat[mat$keep==1,] +counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2, nX=23) { # gc percentage - out$gcpct <- rep(NA_real_, nrow(out)) + mat$gcpct <- rep(NA_real_, nrow(mat)) # get GC percentages from pctGCdata package # loop thru chromosomes - nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22 + + nchr <- nX - 1 # IMPACT doesn't have X so only 22 for (i in 1:nchr) { - ii <- which(out$chrom==i) + ii <- which(mat$chrom==i) # allow for chromosomes with no SNPs i.e. not targeted if (length(ii) > 0) { if (gbuild == "udef") { - out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild, ugcpct) + mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild, ugcpct) } else { - out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild) + mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild) } } } ##### log-ratio with gc correction and maf log-odds ratio steps - chrom <- out$chrom - maploc <- out$maploc - rCountN <- out$rCountN - rCountT <- out$rCountT - vafT <- out$vafT - vafN <- out$vafN - het <- out$het - gcpct <- out$gcpct + chrom <- mat$chrom + maploc <- mat$maploc + rCountN <- mat$rCountN + rCountT <- mat$rCountT + vafT <- mat$vafT + vafN <- mat$vafN + het <- mat$het + gcpct <- mat$gcpct # compute gc bias ncount <- tapply(rCountN, gcpct, sum) tcount <- tapply(rCountT, gcpct, sum) @@ -98,10 +98,10 @@ counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) { lorvar[het==1] <- (1/(rcmat[,1]+0.5) + 1/(rcmat[,2]+0.5) + 1/(rcmat[,3]+0.5) + 1/(rcmat[,4]+0.5)) } # put them together - out$lorvar <- out$valor <- out$cnlr <- out$gcbias <- rep(NA_real_, nrow(out)) - out$gcbias <- gcbias - out$cnlr <- cnlr - out$valor <- valor - out$lorvar <- lorvar - out + mat$lorvar <- mat$valor <- mat$cnlr <- mat$gcbias <- rep(NA_real_, nrow(mat)) + mat$gcbias <- gcbias + mat$cnlr <- cnlr + mat$valor <- valor + mat$lorvar <- lorvar + mat } diff --git a/R/facets-segment.R b/R/facets-segment.R index b864a19..ae1a8b1 100644 --- a/R/facets-segment.R +++ b/R/facets-segment.R @@ -117,7 +117,7 @@ prune.cpt.tree <- function(seg.tree, cval=25) { } # segment by looping over the chromosomes -segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) { +segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0, nX=23) { # keep the original data mat0 <- mat # keep only rows that have finite values for cnlr @@ -137,7 +137,7 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) { # initialize segment indicator mat$seg <- rep(NA_real_, nrow(mat)) # loop over chromosomes - nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22 + nchr <- nX - 1 # IMPACT doesn't have X so only 22 # possible chromosomes chrs <- 1:nchr # initialize segmentation tree @@ -169,15 +169,19 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) { jointsegsummary <- function(jointseg) { # remove snps with NA in segs (due to NA in cnlr) jointseg <- jointseg[is.finite(jointseg$seg),] + # initialize output table nsegs <- max(jointseg$seg) + # segment start and end indices and number of loci - seg.start <- which(diff(c(0,jointseg$seg))==1) + seg.start <- which(diff(c(0, jointseg$seg)) == 1) seg.end <- c(seg.start[-1]-1, nrow(jointseg)) num.mark <- seg.end - seg.start + 1 + # initialize the output out <- as.data.frame(matrix(0, nsegs, 6)) names(out) <- c("chrom","seg","num.mark","nhet","cnlr.median","mafR") + # function to estimate maf from valor and lorvar maffun <- function(x) { # occasional extreme valor can screw maf. so winsorize the maf @@ -191,8 +195,11 @@ jointsegsummary <- function(jointseg) { valor[valor < valor.thresh] <- valor.thresh sum(((valor)^2 - lorvar)/lorvar)/sum(1/lorvar) } + # loop over the segments for (seg in 1:nsegs) { + if (is.na(seg.start[seg]) && is.na(seg.end[seg])) + break zhet <- jointseg$het[seg.start[seg]:seg.end[seg]] out[seg, 1] <- jointseg$chrom[seg.start[seg]] out[seg, 2] <- seg @@ -202,6 +209,7 @@ jointsegsummary <- function(jointseg) { if (out[seg, 4] > 0) { zvalor <- jointseg$valor[seg.start[seg]:seg.end[seg]] zlorvar <- jointseg$lorvar[seg.start[seg]:seg.end[seg]] + zz1 <- data.frame(valor=zvalor[zhet == 1], lorvar=zlorvar[zhet == 1]) out[seg, 6] <- maffun(zz1) } diff --git a/R/facets-wrapper.R b/R/facets-wrapper.R index 61b8d30..d162256 100644 --- a/R/facets-wrapper.R +++ b/R/facets-wrapper.R @@ -10,8 +10,8 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per col_types=cols( Chromosome = col_character(), Position = col_integer(), - Ref = col_character(), - Alt = col_character(), + Ref = col_skip(), + Alt = col_skip(), File1R = col_integer(), File1A = col_integer(), File1E = col_integer(), @@ -28,15 +28,17 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per if (grepl("chr",pileup$Chromosome[1])) { pileup$Chromosome <- gsub("chr", "", pileup$Chromosome) } - # remove loci where errors and deletions exceeded thresholds - ii <- which(pileup$File1E <= err.thresh & pileup$File1D <= del.thresh & pileup$File2E <= err.thresh & pileup$File2D <= del.thresh) - rcmat <- pileup[ii, 1:2] - rcmat$NOR.DP <- pileup$File1R[ii] + pileup$File1A[ii] - rcmat$NOR.RD <- pileup$File1R[ii] - rcmat$TUM.DP <- pileup$File2R[ii] + pileup$File2A[ii] - rcmat$TUM.RD <- pileup$File2R[ii] + + rcmat <- pileup %>% + dplyr::filter(pileup$File1E <= err.thresh & pileup$File1D <= del.thresh & pileup$File2E <= err.thresh & pileup$File2D <= del.thresh) %>% + tibble::add_column(NOR.DP = pileup$File1R + pileup$File1A) %>% + dplyr::rename(NOR.RD = File1R) %>% + tibble::add_column(TUM.DP = pileup$File2R + pileup$File2A) %>% + dplyr::rename(TUM.RD = File2R) %>% + dplyr::select(-contains("File")) } - rcmat + + as.data.frame(rcmat) } preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"), ugcpct=NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) { @@ -51,33 +53,42 @@ preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval= nX <- length(ugcpct) } } - pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax) + + rcmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax) + if (gbuild == "udef") { - dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched, ugcpct) + dmat <- counts2logROR(rcmat, gbuild, unmatched, ugcpct, nX=nX) } else { - dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched) + dmat <- counts2logROR(rcmat, gbuild, unmatched, nX=nX) } + tmp <- segsnps(dmat, cval, hetscale, deltaCN) - out <- list(pmat=pmat, gbuild=gbuild, nX=nX) + out <- list(pmat=rcmat, gbuild=gbuild, nX=nX) c(out, tmp) } procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) { # ensure availability of seg.tree if (is.null(x$seg.tree)) stop("seg.tree is not available") + # get the numeric value of chromosome X nX <- x$nX + # make sure that original cval is smaller than current one cval.fit <- attr(x$seg.tree, "cval") if (cval.fit > cval) stop("original fit used cval = ", cval.fit) + # jointseg etc jseg <- x$jointseg jseg <- jseg[is.finite(jseg$cnlr),] + # chromosomes with data and their counts chrs <- x$chromlevels nchr <- length(chrs) + # get chromlevels from chrs chromlevels <- c(1:(nX-1), "X")[chrs] + # get the segment summary for the fit in seg.tree nsegs <- 0 # jointseg already has a seg variable numbered 1 thru number of segments for each chromosome @@ -86,13 +97,17 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) { nsegs <- max(jseg$seg[jseg$chrom==chrs[i]]) } focalout <- jointsegsummary(jseg) + # cnlr.median to the left and right cnlr.med.l <- c(0, focalout$cnlr.median[-nsegs]) cnlr.med.r <- c(focalout$cnlr.median[-1], 0) + # mad of cnlr noise cnlr.mad <- mad(jseg$cnlr - rep(focalout$cnlr.median, focalout$num.mark)) + # segments that show focal changes have big jump in cnlr.median focalout$focal <- 1*(focalout$cnlr.median > pmax(cnlr.med.l, cnlr.med.r)+3*cnlr.mad) + 1*(focalout$cnlr.median < pmin(cnlr.med.l, cnlr.med.r)-3*cnlr.mad) + # get the segments for the specified cval nsegs <- 0 for (i in 1:nchr) { @@ -100,14 +115,19 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) { jseg$seg[jseg$chrom==chrs[i]] <- nsegs + rep(1:length(seg.widths), seg.widths) nsegs <- nsegs + length(seg.widths) } + # adding the focal change segments - need a jump at the beginning and end jseg$seg0 <- jseg$seg # detected segments + # jump at the beginning (twice the height) jseg$seg <- jseg$seg + rep(cumsum(2*focalout$focal), focalout$num.mark) + # drop back for the focal segment to get the steps right jseg$seg <- jseg$seg - rep(focalout$focal, focalout$num.mark) + # focal segment could already be in; so change seg indicator jseg$seg <- cumsum(c(1, 1*(diff(jseg$seg) > 0))) + # segment summaries out <- jointsegsummary(jseg) # cluster the segments @@ -128,18 +148,23 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) { plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive","both","none"), sname=NULL) { def.par <- par(no.readonly = TRUE) # save default, for resetting... + # plot.type plot.type <- match.arg(plot.type) + # layout of multi panel figure if (plot.type=="none") layout(matrix(1:2, ncol=1)) if (plot.type=="em") layout(matrix(rep(1:4, c(9,9,6,1)), ncol=1)) if (plot.type=="naive") layout(matrix(rep(1:4, c(9,9,6,1)), ncol=1)) if (plot.type=="both") layout(matrix(rep(1:6, c(9,9,6,1,6,1)), ncol=1)) par(mar=c(0.25,3,0.25,1), mgp=c(1.75, 0.6, 0), oma=c(3,0,1.25,0)) + # raw data used for joint segmentation jseg <- x$jointseg + # chromosome boundaries - chrbdry <- which(diff(jseg$chrom) != 0) + chrbdry <- which(diff(as.numeric(jseg$chr[jseg$chr != "X"])) != 0) + if (missing(emfit)) { out <- x$out if (plot.type=="em" | plot.type=="both") { @@ -153,6 +178,11 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive", out$lcn <- x$out$lcn out$cf <- x$out$cf } + + # Filter out chr X + out <- out[out$chr != 'X',] + out$chr = as.numeric(out$chr) + # determine which of the cnlr.median & mafR to show if (clustered) { cnlr.median <- out$cnlr.median.clust @@ -163,23 +193,27 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive", mafR <- out$mafR } mafR <- abs(mafR) + # chromosome colors chrcol <- 1+rep(out$chr-2*floor(out$chr/2), out$num.mark) nn <- cumsum(table(jseg$chrom[is.finite(jseg$cnlr)])) segbdry <- cumsum(c(0,out$num.mark)) segstart <- segbdry[-length(segbdry)] segend <- segbdry[-1] + # plot the logR data and segment medians plot(jseg$cnlr[is.finite(jseg$cnlr)], pch=".", cex=2, col = c("grey","lightblue","azure4","slateblue")[chrcol], ylab="log-ratio", xaxt="n") abline(v=chrbdry, lwd=0.25) abline(h=median(jseg$cnlr, na.rm=TRUE), col="green2") abline(h = x$dipLogR, col = "magenta4") segments(segstart, cnlr.median, segend, cnlr.median, lwd=1.75, col=2) + # plot the logOR data and mafR plot(jseg$valor[is.finite(jseg$cnlr)], pch=".", cex=2.5, col = c("grey","lightblue","azure4","slateblue")[chrcol], ylab="log-odds-ratio", ylim=c(-4,4), xaxt="n") abline(v=chrbdry, lwd=0.25) segments(segstart, sqrt(mafR), segend, sqrt(mafR), lwd=1.75, col=2) segments(segstart, -sqrt(mafR), segend, -sqrt(mafR), lwd=1.75, col=2) + # naive copy number and cellular faction pieces cfpalette <- c(colorRampPalette(c("white", "steelblue"))(10),"bisque2") if (plot.type=="naive" | plot.type=="both") { @@ -197,6 +231,7 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive", cfcol <- cfpalette[round(10*out$cf+0.501)] rect(segstart, 0, segend, 1, col=cfcol, border=NA) } + # EM copy number and cellular faction pieces if (plot.type=="em" | plot.type=="both") { # plot the estimated copy numbers and cf @@ -216,10 +251,12 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive", # now add the chromosome ticks on x-axis chromlevels <- x$chromlevels + # just make sure chromlevels actually exists if (is.null(chromlevels)) chromlevels <- 1:length(nn) axis(labels=chromlevels, side=1, at=(nn+c(0,nn[-length(nn)]))/2, cex=0.65) mtext(side=1, line=1.75, "Chromosome", cex=0.8) + if (!missing(sname)) mtext(sname, side=3, line=0, outer=TRUE, cex=0.8) par(def.par) #- reset to default } @@ -228,11 +265,14 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) { rho <- seq(0, 0.95, by=0.01) nrho <- length(rho) logACR <- logCNR <- matrix(0, nrho, 19) + # initialize index l <- 1 + # one copy loss logCNR[,l] <- log2(2*(1-rho) + 1*rho) -1 logACR[,l] <- log(1/(1-rho)) + # integer copy numbers (clonal) for(i in 2:7) { for(j in 0:floor(i/2)) { @@ -260,6 +300,6 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) { ii <- cncf$num.mark > nfrac*nsnps & cncf$nhet > nfrac*nhets cex <- 0.3 + 2.7*(cncf$num.mark[ii]/sum(0.1*cncf$num.mark[ii])) chrcol <- rainbow(24) - points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[cncf$chrom[ii]], lwd=1.5) + points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[as.numeric(cncf$chrom[ii])], lwd=1.5) legend(-1, 5.25, paste("chr", c(1:22, "X"), sep=""), ncol=4, pch=10, col=chrcol[1:23], cex=0.65) }