Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ Maintainer: Venkatraman E. Seshan <seshanv@mskcc.org>
Description: Algorithm to implement Fraction and Allelic Copy number
Estimate from Tumor/normal Sequencing.
License: GPL (>= 2)
Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0)
Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), tidyverse (>= 1.2.1)
Remotes: veseshan/pctGCdata
NeedsCompilation: yes
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
useDynLib(facets)
import(stats,graphics)
import("tidyverse")
importFrom("utils", "read.csv")
importFrom("grDevices", "colorRampPalette", "rainbow")
importFrom("pctGCdata", "getGCpct")
Expand Down
5 changes: 2 additions & 3 deletions R/facets-findDipLogR.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ findDiploidLogR <- function(out, cnlr) {
if (diff(dipLogR) < log2(1.25))
flags <- c(flags, "possibly subclonal 2+2 states present")
# cat("dipLogR =", dipLogR, "\n")
# print(out0)

# first remove the balanced segs
out1 <- out0[-bsegs,]
# is dipLogR[1] the 1+1 level; if so all lower cnlr.med values are losses
Expand Down Expand Up @@ -121,7 +121,6 @@ findDiploidLogR <- function(out, cnlr) {
out2 <- t(sapply(1:nrow(out1), function(i, dlr, out1) {
acnsplit(dlr, out1$cnlr.median[i], out1$mafR[i])
}, dipLogR[1], out1))
# print(out2)
out1$acn <- apply(out2[,2*(1:3), drop=FALSE], 1, which.min)
# proportion of genome that fits 1+0, 2+0 & 3+0
acn1prop <- sum(out1$num.mark[out1$acn == 1])/nsnps
Expand Down Expand Up @@ -207,7 +206,7 @@ findDiploidLogR <- function(out, cnlr) {
}
}
#plot(ocnlevels0, dev1, pch=16, col=colr)
#print(dev1)

cn2logR <- ocnlevels0[which.min(dev1)]
# if wgd.likely is non-null add dipLogR[1]
if (!is.null(wgd.likely)) {
Expand Down
82 changes: 41 additions & 41 deletions R/facets-procreads.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,33 @@
procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23, unmatched=FALSE, ndepthmax=1000) {
# keep only chromsomes 1-22 & X for humans and 1-19, X for mice
# for other genomes (gbuild = udef) nX is number of autosomes plus 1
chromlevels <- c(1:(nX-1),"X")
chr.keep <- rcmat$Chromosome %in% chromlevels
# keep only snps with normal read depth between ndepth and 1000
depthN.keep <- (rcmat$NOR.DP >= ndepth) & (rcmat$NOR.DP < ndepthmax)
# reduce the data frame to these snps
rcmat <- rcmat[chr.keep & depthN.keep,]
# output data frame
out <- list()
out$chrom <- rcmat$Chromosome
out$maploc <- rcmat$Position
out$rCountT <- rcmat$TUM.DP
out$rCountN <- rcmat$NOR.DP
out$vafT <- 1 - rcmat$TUM.RD/rcmat$TUM.DP
out$vafN <- 1 - rcmat$NOR.RD/rcmat$NOR.DP
# make chromosome ordered and numeric
out$chrom <- as.numeric(ordered(out$chrom, levels=chromlevels))
rcmat <- rcmat %>%
tibble::add_column(vafT = 1 - rcmat$TUM.RD/rcmat$TUM.DP) %>%
tibble::add_column(vafN = 1 - rcmat$NOR.RD/rcmat$NOR.DP) %>%
dplyr::filter(Chromosome %in% c(1:(nX-1), "X")) %>%
dplyr::filter((NOR.DP >= ndepth) & (NOR.DP < ndepthmax)) %>%
dplyr::filter(TUM.DP>0) %>%
dplyr::rename(chrom = Chromosome) %>%
dplyr::rename(maploc = Position) %>%
dplyr::rename(rCountT = TUM.DP) %>%
dplyr::rename(rCountN = NOR.DP) %>%

dplyr::arrange(chrom)


# call a snp heterozygous if min(vafN, 1-mafN) > het.thresh
if (unmatched) {
if (het.thresh == 0.25) het.thresh <- 0.1
out$het <- 1*(pmin(out$vafT, 1-out$vafT) > het.thresh & out$rCountT >= 50)
rcmat$het <- 1*(pmin(rcmat$vafT, 1-rcmat$vafT) > het.thresh & rcmat$rCountT >= 50)
} else {
out$het <- 1*(pmin(out$vafN, 1-out$vafN) > het.thresh)
rcmat$het <- 1*(pmin(rcmat$vafN, 1-rcmat$vafN) > het.thresh)
}
# scan maploc for snps that are close to one another (within snp.nbhd bases)
# heep all the hets (should change if too close) and only one from a nbhd
out$keep <- scanSnp(out$maploc, out$het, snp.nbhd)
as.data.frame(out)
rcmat$keep <- scanSnp(rcmat$maploc, rcmat$het, snp.nbhd)
rcmat <- rcmat %>%
dplyr::filter(keep == 1)
as.data.frame(rcmat)
}

scanSnp <- function(maploc, het, nbhd) {
Expand All @@ -43,33 +43,33 @@ scanSnp <- function(maploc, het, nbhd) {
}

# obtain logR and logOR from read counts and GC-correct logR
counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
out <- mat[mat$keep==1,]
counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2, nX=23) {
# gc percentage
out$gcpct <- rep(NA_real_, nrow(out))
mat$gcpct <- rep(NA_real_, nrow(mat))
# get GC percentages from pctGCdata package
# loop thru chromosomes
nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22

nchr <- nX - 1 # IMPACT doesn't have X so only 22
for (i in 1:nchr) {
ii <- which(out$chrom==i)
ii <- which(mat$chrom==i)
# allow for chromosomes with no SNPs i.e. not targeted
if (length(ii) > 0) {
if (gbuild == "udef") {
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild, ugcpct)
mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild, ugcpct)
} else {
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild)
}
}
}
##### log-ratio with gc correction and maf log-odds ratio steps
chrom <- out$chrom
maploc <- out$maploc
rCountN <- out$rCountN
rCountT <- out$rCountT
vafT <- out$vafT
vafN <- out$vafN
het <- out$het
gcpct <- out$gcpct
chrom <- mat$chrom
maploc <- mat$maploc
rCountN <- mat$rCountN
rCountT <- mat$rCountT
vafT <- mat$vafT
vafN <- mat$vafN
het <- mat$het
gcpct <- mat$gcpct
# compute gc bias
ncount <- tapply(rCountN, gcpct, sum)
tcount <- tapply(rCountT, gcpct, sum)
Expand Down Expand Up @@ -98,10 +98,10 @@ counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
lorvar[het==1] <- (1/(rcmat[,1]+0.5) + 1/(rcmat[,2]+0.5) + 1/(rcmat[,3]+0.5) + 1/(rcmat[,4]+0.5))
}
# put them together
out$lorvar <- out$valor <- out$cnlr <- out$gcbias <- rep(NA_real_, nrow(out))
out$gcbias <- gcbias
out$cnlr <- cnlr
out$valor <- valor
out$lorvar <- lorvar
out
mat$lorvar <- mat$valor <- mat$cnlr <- mat$gcbias <- rep(NA_real_, nrow(mat))
mat$gcbias <- gcbias
mat$cnlr <- cnlr
mat$valor <- valor
mat$lorvar <- lorvar
mat
}
14 changes: 11 additions & 3 deletions R/facets-segment.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ prune.cpt.tree <- function(seg.tree, cval=25) {
}

# segment by looping over the chromosomes
segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0, nX=23) {
# keep the original data
mat0 <- mat
# keep only rows that have finite values for cnlr
Expand All @@ -137,7 +137,7 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
# initialize segment indicator
mat$seg <- rep(NA_real_, nrow(mat))
# loop over chromosomes
nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22
nchr <- nX - 1 # IMPACT doesn't have X so only 22
# possible chromosomes
chrs <- 1:nchr
# initialize segmentation tree
Expand Down Expand Up @@ -169,15 +169,19 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
jointsegsummary <- function(jointseg) {
# remove snps with NA in segs (due to NA in cnlr)
jointseg <- jointseg[is.finite(jointseg$seg),]

# initialize output table
nsegs <- max(jointseg$seg)

# segment start and end indices and number of loci
seg.start <- which(diff(c(0,jointseg$seg))==1)
seg.start <- which(diff(c(0, jointseg$seg)) == 1)
seg.end <- c(seg.start[-1]-1, nrow(jointseg))
num.mark <- seg.end - seg.start + 1

# initialize the output
out <- as.data.frame(matrix(0, nsegs, 6))
names(out) <- c("chrom","seg","num.mark","nhet","cnlr.median","mafR")

# function to estimate maf from valor and lorvar
maffun <- function(x) {
# occasional extreme valor can screw maf. so winsorize the maf
Expand All @@ -191,8 +195,11 @@ jointsegsummary <- function(jointseg) {
valor[valor < valor.thresh] <- valor.thresh
sum(((valor)^2 - lorvar)/lorvar)/sum(1/lorvar)
}

# loop over the segments
for (seg in 1:nsegs) {
if (is.na(seg.start[seg]) && is.na(seg.end[seg]))
break
zhet <- jointseg$het[seg.start[seg]:seg.end[seg]]
out[seg, 1] <- jointseg$chrom[seg.start[seg]]
out[seg, 2] <- seg
Expand All @@ -202,6 +209,7 @@ jointsegsummary <- function(jointseg) {
if (out[seg, 4] > 0) {
zvalor <- jointseg$valor[seg.start[seg]:seg.end[seg]]
zlorvar <- jointseg$lorvar[seg.start[seg]:seg.end[seg]]

zz1 <- data.frame(valor=zvalor[zhet == 1], lorvar=zlorvar[zhet == 1])
out[seg, 6] <- maffun(zz1)
}
Expand Down
Loading