From 39e527d785c2f563af27ecf96ce7fb7504e4ee2b Mon Sep 17 00:00:00 2001
From: Pierre Marijon <pierre.marijon-ext@aphp.fr>
Date: Fri, 18 Jun 2021 11:58:18 +0200
Subject: [PATCH 1/2] Replace read.csv by readr::read_csv to save many memory

---
 DESCRIPTION        |  2 +-
 NAMESPACE          |  1 +
 R/facets-wrapper.R | 19 ++++++++++++++++++-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 42b3531..9661a55 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,6 +8,6 @@ Maintainer: Venkatraman E. Seshan <seshanv@mskcc.org>
 Description: Algorithm to implement Fraction and Allelic Copy number
              Estimate from Tumor/normal Sequencing.
 License: GPL (>= 2)
-Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0)
+Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), readr (>= 1.3.1)
 Remotes: veseshan/pctGCdata
 NeedsCompilation: yes
diff --git a/NAMESPACE b/NAMESPACE
index 936d491..669199c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 useDynLib(facets)
 import(stats,graphics)
+import("readr")
 importFrom("utils", "read.csv")
 importFrom("grDevices", "colorRampPalette", "rainbow")
 importFrom("pctGCdata", "getGCpct")
diff --git a/R/facets-wrapper.R b/R/facets-wrapper.R
index 7b2de24..61b8d30 100644
--- a/R/facets-wrapper.R
+++ b/R/facets-wrapper.R
@@ -6,7 +6,24 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per
         rcmat <- as.data.frame(rcmat, stringsAsFactors=FALSE)
     } else {
         # read the read count matrix generated by snp-pileup.cpp code
-        pileup <- read.csv(filename, stringsAsFactors=FALSE, colClasses=rep(c("character", "numeric","character", "numeric"), c(1,1,2,8)))
+        pileup <- readr::read_csv(filename,
+                                  col_types=cols(
+                                      Chromosome = col_character(),
+                                      Position = col_integer(),
+                                      Ref = col_character(),
+                                      Alt = col_character(),
+                                      File1R = col_integer(),
+                                      File1A = col_integer(),
+                                      File1E = col_integer(),
+                                      File1D = col_integer(),
+                                      File2R = col_integer(),
+                                      File2A = col_integer(),
+                                      File2E = col_integer(),
+                                      File2D = col_integer()
+                                  ),
+                                  guess_max=0
+                                  )
+
         # remove chr if present in Chrom
         if (grepl("chr",pileup$Chromosome[1])) {
             pileup$Chromosome <- gsub("chr", "", pileup$Chromosome)

From c14d8542ab92e15ad364ec544606594330972c06 Mon Sep 17 00:00:00 2001
From: Pierre Marijon <pierre.marijon-ext@aphp.fr>
Date: Fri, 18 Jun 2021 14:46:28 +0200
Subject: [PATCH 2/2] Use tidyverse pipe system to work on some dataframe

---
 DESCRIPTION            |  2 +-
 NAMESPACE              |  2 +-
 R/facets-findDipLogR.R |  5 ++-
 R/facets-procreads.R   | 82 +++++++++++++++++++++---------------------
 R/facets-segment.R     | 14 ++++++--
 R/facets-wrapper.R     | 72 ++++++++++++++++++++++++++++---------
 6 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9661a55..2d0d540 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,6 +8,6 @@ Maintainer: Venkatraman E. Seshan <seshanv@mskcc.org>
 Description: Algorithm to implement Fraction and Allelic Copy number
              Estimate from Tumor/normal Sequencing.
 License: GPL (>= 2)
-Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), readr (>= 1.3.1)
+Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0), tidyverse (>= 1.2.1)
 Remotes: veseshan/pctGCdata
 NeedsCompilation: yes
diff --git a/NAMESPACE b/NAMESPACE
index 669199c..ececd72 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,6 @@
 useDynLib(facets)
 import(stats,graphics)
-import("readr")
+import("tidyverse")
 importFrom("utils", "read.csv")
 importFrom("grDevices", "colorRampPalette", "rainbow")
 importFrom("pctGCdata", "getGCpct")
diff --git a/R/facets-findDipLogR.R b/R/facets-findDipLogR.R
index 4d4c4dc..aecc851 100644
--- a/R/facets-findDipLogR.R
+++ b/R/facets-findDipLogR.R
@@ -93,7 +93,7 @@ findDiploidLogR <- function(out, cnlr) {
         if (diff(dipLogR) < log2(1.25)) 
             flags <- c(flags, "possibly subclonal 2+2 states present")
     # cat("dipLogR =", dipLogR, "\n")
-    # print(out0)
+
     # first remove the balanced segs
     out1 <- out0[-bsegs,]
     # is dipLogR[1] the 1+1 level; if so all lower cnlr.med values are losses
@@ -121,7 +121,6 @@ findDiploidLogR <- function(out, cnlr) {
         out2 <- t(sapply(1:nrow(out1), function(i, dlr, out1) {
                              acnsplit(dlr, out1$cnlr.median[i], out1$mafR[i])
                          }, dipLogR[1], out1))
-        # print(out2)
         out1$acn <- apply(out2[,2*(1:3), drop=FALSE], 1, which.min)
         # proportion of genome that fits 1+0, 2+0 & 3+0
         acn1prop <- sum(out1$num.mark[out1$acn == 1])/nsnps
@@ -207,7 +206,7 @@ findDiploidLogR <- function(out, cnlr) {
             }
         }
         #plot(ocnlevels0, dev1, pch=16, col=colr)
-        #print(dev1)
+
         cn2logR <- ocnlevels0[which.min(dev1)]
         # if wgd.likely is non-null add dipLogR[1]
         if (!is.null(wgd.likely)) {
diff --git a/R/facets-procreads.R b/R/facets-procreads.R
index 85b8540..af8cdc1 100644
--- a/R/facets-procreads.R
+++ b/R/facets-procreads.R
@@ -2,33 +2,33 @@
 procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23, unmatched=FALSE, ndepthmax=1000) {
     # keep only chromsomes 1-22 & X for humans and 1-19, X for mice
     # for other genomes (gbuild = udef) nX is number of autosomes plus 1
-    chromlevels <- c(1:(nX-1),"X")
-    chr.keep <- rcmat$Chromosome %in% chromlevels
-    # keep only snps with normal read depth between ndepth and 1000
-    depthN.keep <- (rcmat$NOR.DP >= ndepth) & (rcmat$NOR.DP < ndepthmax)
-    # reduce the data frame to these snps
-    rcmat <- rcmat[chr.keep & depthN.keep,]
-    # output data frame
-    out <- list()
-    out$chrom <- rcmat$Chromosome
-    out$maploc <- rcmat$Position
-    out$rCountT <- rcmat$TUM.DP
-    out$rCountN <- rcmat$NOR.DP
-    out$vafT <- 1 - rcmat$TUM.RD/rcmat$TUM.DP
-    out$vafN <- 1 - rcmat$NOR.RD/rcmat$NOR.DP
-    # make chromosome ordered and numeric
-    out$chrom <- as.numeric(ordered(out$chrom, levels=chromlevels))
+    rcmat <- rcmat %>%
+        tibble::add_column(vafT = 1 - rcmat$TUM.RD/rcmat$TUM.DP) %>%
+        tibble::add_column(vafN = 1 - rcmat$NOR.RD/rcmat$NOR.DP) %>%
+        dplyr::filter(Chromosome %in% c(1:(nX-1), "X")) %>%
+        dplyr::filter((NOR.DP >= ndepth) & (NOR.DP < ndepthmax)) %>%
+        dplyr::filter(TUM.DP>0) %>%
+        dplyr::rename(chrom = Chromosome) %>%
+        dplyr::rename(maploc = Position) %>%
+        dplyr::rename(rCountT = TUM.DP) %>%
+        dplyr::rename(rCountN = NOR.DP) %>%
+
+        dplyr::arrange(chrom)
+    
+
     # call a snp heterozygous if min(vafN, 1-mafN) > het.thresh
     if (unmatched) {
         if (het.thresh == 0.25) het.thresh <- 0.1
-        out$het <- 1*(pmin(out$vafT, 1-out$vafT) > het.thresh & out$rCountT >= 50)
+        rcmat$het <- 1*(pmin(rcmat$vafT, 1-rcmat$vafT) > het.thresh & rcmat$rCountT >= 50)
     } else {
-        out$het <- 1*(pmin(out$vafN, 1-out$vafN) > het.thresh)
+        rcmat$het <- 1*(pmin(rcmat$vafN, 1-rcmat$vafN) > het.thresh)
     }
     # scan maploc for snps that are close to one another (within snp.nbhd bases)
     # heep all the hets (should change if too close) and only one from a nbhd
-    out$keep <- scanSnp(out$maploc, out$het, snp.nbhd)
-    as.data.frame(out)
+    rcmat$keep <- scanSnp(rcmat$maploc, rcmat$het, snp.nbhd)
+    rcmat <- rcmat %>%
+        dplyr::filter(keep == 1)
+    as.data.frame(rcmat)
 }
 
 scanSnp <- function(maploc, het, nbhd) {
@@ -43,33 +43,33 @@ scanSnp <- function(maploc, het, nbhd) {
 }
 
 # obtain logR and logOR from read counts and GC-correct logR
-counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
-    out <- mat[mat$keep==1,]
+counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2, nX=23) {
     # gc percentage
-    out$gcpct <- rep(NA_real_, nrow(out))
+    mat$gcpct <- rep(NA_real_, nrow(mat))
     # get GC percentages from pctGCdata package
     # loop thru chromosomes
-    nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22
+
+    nchr <- nX - 1 # IMPACT doesn't have X so only 22
     for (i in 1:nchr) {
-        ii <- which(out$chrom==i)
+        ii <- which(mat$chrom==i)
         # allow for chromosomes with no SNPs i.e. not targeted
         if (length(ii) > 0) {
             if (gbuild == "udef") {
-                out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild, ugcpct)
+                mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild, ugcpct)
             } else {
-                out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
+                mat$gcpct[ii] <- getGCpct(i, mat$maploc[ii], gbuild)
             }
         }
     }
     ##### log-ratio with gc correction and maf log-odds ratio steps
-    chrom <- out$chrom
-    maploc <- out$maploc
-    rCountN <- out$rCountN
-    rCountT <- out$rCountT
-    vafT <- out$vafT
-    vafN <- out$vafN
-    het <- out$het
-    gcpct <- out$gcpct
+    chrom <- mat$chrom
+    maploc <- mat$maploc
+    rCountN <- mat$rCountN
+    rCountT <- mat$rCountT
+    vafT <- mat$vafT
+    vafN <- mat$vafN
+    het <- mat$het
+    gcpct <- mat$gcpct
     # compute gc bias
     ncount <- tapply(rCountN, gcpct, sum)
     tcount <- tapply(rCountT, gcpct, sum)
@@ -98,10 +98,10 @@ counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
         lorvar[het==1] <- (1/(rcmat[,1]+0.5) + 1/(rcmat[,2]+0.5) + 1/(rcmat[,3]+0.5) + 1/(rcmat[,4]+0.5))
     }
     # put them together
-    out$lorvar <- out$valor <- out$cnlr <- out$gcbias <- rep(NA_real_, nrow(out))
-    out$gcbias <- gcbias
-    out$cnlr <- cnlr
-    out$valor <- valor
-    out$lorvar <- lorvar
-    out
+    mat$lorvar <- mat$valor <- mat$cnlr <- mat$gcbias <- rep(NA_real_, nrow(mat))
+    mat$gcbias <- gcbias
+    mat$cnlr <- cnlr
+    mat$valor <- valor
+    mat$lorvar <- lorvar
+    mat
 }
diff --git a/R/facets-segment.R b/R/facets-segment.R
index b864a19..ae1a8b1 100644
--- a/R/facets-segment.R
+++ b/R/facets-segment.R
@@ -117,7 +117,7 @@ prune.cpt.tree <- function(seg.tree, cval=25) {
 }
 
 # segment by looping over the chromosomes
-segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
+segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0, nX=23) {
     # keep the original data
     mat0 <- mat
     # keep only rows that have finite values for cnlr
@@ -137,7 +137,7 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
     # initialize segment indicator
     mat$seg <- rep(NA_real_, nrow(mat))
     # loop over chromosomes
-    nchr <- max(mat$chrom) # IMPACT doesn't have X so only 22
+    nchr <- nX - 1 # IMPACT doesn't have X so only 22
     # possible chromosomes
     chrs <- 1:nchr
     # initialize segmentation tree
@@ -169,15 +169,19 @@ segsnps <- function(mat, cval=25, hetscale=FALSE, delta=0) {
 jointsegsummary <- function(jointseg) {
     # remove snps with NA in segs (due to NA in cnlr)
     jointseg <- jointseg[is.finite(jointseg$seg),]
+    
     # initialize output table
     nsegs <- max(jointseg$seg)
+    
     # segment start and end indices and number of loci
-    seg.start <- which(diff(c(0,jointseg$seg))==1)
+    seg.start <- which(diff(c(0, jointseg$seg)) == 1)
     seg.end <- c(seg.start[-1]-1, nrow(jointseg))
     num.mark <- seg.end - seg.start + 1
+    
     # initialize the output
     out <- as.data.frame(matrix(0, nsegs, 6))
     names(out) <- c("chrom","seg","num.mark","nhet","cnlr.median","mafR")
+    
     # function to estimate maf from valor and lorvar
     maffun <- function(x) {
         # occasional extreme valor can screw maf. so winsorize the maf
@@ -191,8 +195,11 @@ jointsegsummary <- function(jointseg) {
         valor[valor < valor.thresh] <- valor.thresh
         sum(((valor)^2 - lorvar)/lorvar)/sum(1/lorvar)
     }
+
     # loop over the segments
     for (seg in 1:nsegs) {
+        if (is.na(seg.start[seg]) && is.na(seg.end[seg]))
+            break
         zhet <- jointseg$het[seg.start[seg]:seg.end[seg]]
         out[seg, 1] <- jointseg$chrom[seg.start[seg]]
         out[seg, 2] <- seg
@@ -202,6 +209,7 @@ jointsegsummary <- function(jointseg) {
         if (out[seg, 4] > 0) {
             zvalor <- jointseg$valor[seg.start[seg]:seg.end[seg]]
             zlorvar <- jointseg$lorvar[seg.start[seg]:seg.end[seg]]
+
             zz1 <- data.frame(valor=zvalor[zhet == 1], lorvar=zlorvar[zhet == 1])
             out[seg, 6] <- maffun(zz1)
         }
diff --git a/R/facets-wrapper.R b/R/facets-wrapper.R
index 61b8d30..d162256 100644
--- a/R/facets-wrapper.R
+++ b/R/facets-wrapper.R
@@ -10,8 +10,8 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per
                                   col_types=cols(
                                       Chromosome = col_character(),
                                       Position = col_integer(),
-                                      Ref = col_character(),
-                                      Alt = col_character(),
+                                      Ref = col_skip(),
+                                      Alt = col_skip(),
                                       File1R = col_integer(),
                                       File1A = col_integer(),
                                       File1E = col_integer(),
@@ -28,15 +28,17 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per
         if (grepl("chr",pileup$Chromosome[1])) {
             pileup$Chromosome <- gsub("chr", "", pileup$Chromosome)
         }
-        # remove loci where errors and deletions exceeded thresholds
-        ii <- which(pileup$File1E <= err.thresh & pileup$File1D <= del.thresh & pileup$File2E <= err.thresh & pileup$File2D <= del.thresh)
-        rcmat <- pileup[ii, 1:2]
-        rcmat$NOR.DP <- pileup$File1R[ii] + pileup$File1A[ii]
-        rcmat$NOR.RD <- pileup$File1R[ii]
-        rcmat$TUM.DP <- pileup$File2R[ii] + pileup$File2A[ii]
-        rcmat$TUM.RD <- pileup$File2R[ii]
+
+        rcmat <- pileup %>%
+            dplyr::filter(pileup$File1E <= err.thresh & pileup$File1D <= del.thresh & pileup$File2E <= err.thresh & pileup$File2D <= del.thresh) %>%
+            tibble::add_column(NOR.DP = pileup$File1R + pileup$File1A) %>%
+            dplyr::rename(NOR.RD = File1R) %>%
+            tibble::add_column(TUM.DP = pileup$File2R + pileup$File2A) %>%
+            dplyr::rename(TUM.RD = File2R) %>%
+            dplyr::select(-contains("File"))
     }
-    rcmat
+    
+    as.data.frame(rcmat)
 }
 
 preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"), ugcpct=NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) {
@@ -51,33 +53,42 @@ preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=
             nX <- length(ugcpct)
         }
     }
-    pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax)
+
+    rcmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax)
+    
     if (gbuild == "udef") {
-        dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched, ugcpct)
+        dmat <- counts2logROR(rcmat, gbuild, unmatched, ugcpct, nX=nX)
     } else {
-        dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched)
+        dmat <- counts2logROR(rcmat, gbuild, unmatched, nX=nX)
     }
+    
     tmp <- segsnps(dmat, cval, hetscale, deltaCN)
-    out <- list(pmat=pmat, gbuild=gbuild, nX=nX)
+    out <- list(pmat=rcmat, gbuild=gbuild, nX=nX)
     c(out, tmp)
 }
 
 procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
     # ensure availability of seg.tree
     if (is.null(x$seg.tree)) stop("seg.tree is not available")
+
     # get the numeric value of chromosome X
     nX <- x$nX
+
     # make sure that original cval is smaller than current one
     cval.fit <- attr(x$seg.tree, "cval")
     if (cval.fit > cval) stop("original fit used cval = ", cval.fit)
+
     # jointseg etc
     jseg <- x$jointseg
     jseg <- jseg[is.finite(jseg$cnlr),]
+    
     # chromosomes with data and their counts
     chrs <- x$chromlevels
     nchr <- length(chrs)
+    
     # get chromlevels from chrs
     chromlevels <- c(1:(nX-1), "X")[chrs]
+
     # get the segment summary for the fit in seg.tree
     nsegs <- 0
     # jointseg already has a seg variable numbered 1 thru number of segments for each chromosome
@@ -86,13 +97,17 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
         nsegs <- max(jseg$seg[jseg$chrom==chrs[i]])
     }
     focalout <- jointsegsummary(jseg)
+    
     # cnlr.median to the left and right
     cnlr.med.l <- c(0, focalout$cnlr.median[-nsegs])
     cnlr.med.r <- c(focalout$cnlr.median[-1], 0)
+    
     # mad of cnlr noise
     cnlr.mad <- mad(jseg$cnlr - rep(focalout$cnlr.median, focalout$num.mark))
+    
     # segments that show focal changes have big jump in cnlr.median
     focalout$focal <- 1*(focalout$cnlr.median > pmax(cnlr.med.l, cnlr.med.r)+3*cnlr.mad) + 1*(focalout$cnlr.median < pmin(cnlr.med.l, cnlr.med.r)-3*cnlr.mad)
+    
     # get the segments for the specified cval 
     nsegs <- 0
     for (i in 1:nchr) {
@@ -100,14 +115,19 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
         jseg$seg[jseg$chrom==chrs[i]] <- nsegs + rep(1:length(seg.widths), seg.widths)
         nsegs <- nsegs + length(seg.widths)
     }
+    
     # adding the focal change segments - need a jump at the beginning and end
     jseg$seg0 <- jseg$seg # detected segments
+    
     # jump at the beginning (twice the height)
     jseg$seg <- jseg$seg + rep(cumsum(2*focalout$focal), focalout$num.mark)
+    
     # drop back for the focal segment to get the steps right
     jseg$seg <- jseg$seg - rep(focalout$focal, focalout$num.mark)
+    
     # focal segment could already be in; so change seg indicator
     jseg$seg <- cumsum(c(1, 1*(diff(jseg$seg) > 0)))
+    
     # segment summaries
     out <- jointsegsummary(jseg)
     # cluster the segments
@@ -128,18 +148,23 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
 
 plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive","both","none"), sname=NULL) {
     def.par <- par(no.readonly = TRUE) # save default, for resetting...
+    
     # plot.type
     plot.type <- match.arg(plot.type)
+
     # layout of multi panel figure
     if (plot.type=="none") layout(matrix(1:2, ncol=1))
     if (plot.type=="em") layout(matrix(rep(1:4, c(9,9,6,1)), ncol=1))
     if (plot.type=="naive") layout(matrix(rep(1:4, c(9,9,6,1)), ncol=1))
     if (plot.type=="both") layout(matrix(rep(1:6, c(9,9,6,1,6,1)), ncol=1))
     par(mar=c(0.25,3,0.25,1), mgp=c(1.75, 0.6, 0), oma=c(3,0,1.25,0))
+
     # raw data used for joint segmentation
     jseg <- x$jointseg
+
     # chromosome boundaries
-    chrbdry <- which(diff(jseg$chrom) != 0)
+    chrbdry <- which(diff(as.numeric(jseg$chr[jseg$chr != "X"])) != 0)
+
     if (missing(emfit)) {
         out <- x$out
         if (plot.type=="em" | plot.type=="both") {
@@ -153,6 +178,11 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
         out$lcn <- x$out$lcn
         out$cf <- x$out$cf
     }
+    
+    # Filter out chr X
+    out <- out[out$chr != 'X',]
+    out$chr = as.numeric(out$chr)
+    
     # determine which of the cnlr.median & mafR to show
     if (clustered) {
         cnlr.median <- out$cnlr.median.clust
@@ -163,23 +193,27 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
         mafR <- out$mafR
     }
     mafR <- abs(mafR)
+
     # chromosome colors
     chrcol <- 1+rep(out$chr-2*floor(out$chr/2), out$num.mark)
     nn <- cumsum(table(jseg$chrom[is.finite(jseg$cnlr)]))
     segbdry <- cumsum(c(0,out$num.mark))
     segstart <- segbdry[-length(segbdry)]
     segend <- segbdry[-1]
+
     # plot the logR data and segment medians
     plot(jseg$cnlr[is.finite(jseg$cnlr)], pch=".", cex=2, col = c("grey","lightblue","azure4","slateblue")[chrcol], ylab="log-ratio", xaxt="n")
     abline(v=chrbdry, lwd=0.25)
     abline(h=median(jseg$cnlr, na.rm=TRUE), col="green2")
     abline(h = x$dipLogR, col = "magenta4")
     segments(segstart, cnlr.median, segend, cnlr.median, lwd=1.75, col=2)
+
     # plot the logOR data and mafR
     plot(jseg$valor[is.finite(jseg$cnlr)], pch=".", cex=2.5, col = c("grey","lightblue","azure4","slateblue")[chrcol], ylab="log-odds-ratio", ylim=c(-4,4), xaxt="n")
     abline(v=chrbdry, lwd=0.25)
     segments(segstart, sqrt(mafR), segend, sqrt(mafR), lwd=1.75, col=2)
     segments(segstart, -sqrt(mafR), segend, -sqrt(mafR), lwd=1.75, col=2)
+
     # naive copy number and cellular faction pieces
     cfpalette <- c(colorRampPalette(c("white", "steelblue"))(10),"bisque2")
     if (plot.type=="naive" | plot.type=="both") {
@@ -197,6 +231,7 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
         cfcol <- cfpalette[round(10*out$cf+0.501)]
         rect(segstart, 0, segend, 1, col=cfcol, border=NA)
     }
+
     # EM copy number and cellular faction pieces
     if (plot.type=="em" | plot.type=="both") {
         # plot the estimated copy numbers and cf
@@ -216,10 +251,12 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
     
     # now add the chromosome ticks on x-axis
     chromlevels <- x$chromlevels
+
     # just make sure chromlevels actually exists
     if (is.null(chromlevels)) chromlevels <- 1:length(nn)
     axis(labels=chromlevels, side=1, at=(nn+c(0,nn[-length(nn)]))/2, cex=0.65)
     mtext(side=1, line=1.75, "Chromosome", cex=0.8)
+
     if (!missing(sname)) mtext(sname, side=3, line=0, outer=TRUE, cex=0.8)
     par(def.par)  #- reset to default
 }
@@ -228,11 +265,14 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
     rho <- seq(0, 0.95, by=0.01)
     nrho <- length(rho)
     logACR <- logCNR <- matrix(0, nrho, 19)
+    
     # initialize index
     l <- 1
+
     # one copy loss
     logCNR[,l] <- log2(2*(1-rho) + 1*rho) -1
     logACR[,l] <- log(1/(1-rho))
+
     # integer copy numbers (clonal)
     for(i in 2:7) {
         for(j in 0:floor(i/2)) {
@@ -260,6 +300,6 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
     ii <- cncf$num.mark > nfrac*nsnps & cncf$nhet > nfrac*nhets
     cex <- 0.3 + 2.7*(cncf$num.mark[ii]/sum(0.1*cncf$num.mark[ii]))
     chrcol <- rainbow(24)
-    points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[cncf$chrom[ii]], lwd=1.5)
+    points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[as.numeric(cncf$chrom[ii])], lwd=1.5)
     legend(-1, 5.25, paste("chr", c(1:22, "X"), sep=""), ncol=4, pch=10, col=chrcol[1:23], cex=0.65)
 }