diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index e85469e5b..2da4a5037 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -379,3 +379,32 @@ then else echo "Already installed" fi + +# +#clustalw +# + +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Installing ClustalW" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]]; +then + rm -Rf clustal* + rm -Rf 1.2.4-cmake.tar.gz + rm -Rf $LKTOOLS_DIR/clustalw2 + + wget $WGET_OPTS https://github.com/GSLBiotech/clustal-omega/archive/refs/tags/1.2.4-cmake.tar.gz + tar -xf 1.2.4-cmake.tar.gz + cd clustal-omega-1.2.4-cmake + ./configure + make + + install ./src/clustalw2 $LKTOOLS_DIR/clustalw2 + +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 3e31985f4..9f6442e56 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -999,37 +999,6 @@ else fi -# -#clustalw -# - -echo "" -echo "" -echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Installing ClustalW" -echo "" -cd $LKSRC_DIR - -if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]]; -then - rm -Rf clustalw-* - rm -Rf $LKTOOLS_DIR/clustalw2 - - wget $WGET_OPTS http://www.clustal.org/download/current/clustalw-2.1.tar.gz - gunzip clustalw-2.1.tar.gz - tar -xf clustalw-2.1.tar - gzip clustalw-2.1.tar - cd clustalw-2.1 - ./configure - make - - install ./src/clustalw2 $LKTOOLS_DIR/clustalw2 - -else - echo "Already installed" -fi - - # #muscle # diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 47c9041ed..930f93602 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -5069,6 +5069,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw Set toDelete = new HashSet<>(); List> toUpdate = new ArrayList<>(); + Set encounteredReaddata = new HashSet<>(); for (ReadData rd : rs.getReadData()) { if (rd.getSra_accession() == null) @@ -5077,7 +5078,14 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw return null; } + // A given ReadData can be referenced by multiple readsets + if (encounteredReaddata.contains(rd.getRowid())) + { + continue; + } + toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", rd.getRowid(), "archived", true, "container", rd.getContainer()))); + encounteredReaddata.add(rd.getRowid()); // File 1: ExpData d1 = ExperimentService.get().getExpData(rd.getFileId1()); @@ -5131,7 +5139,6 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw { List> keys = new ArrayList<>(); toUpdate.forEach(row -> { - keys.add(new CaseInsensitiveHashMap<>(Map.of("rowid", row.get("rowid")))); }); @@ -5141,7 +5148,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw } catch (Exception e) { - _log.error(e); + _log.error("Error archiving readsets", e); errors.reject(ERROR_MSG, "Error archiving readset: " + readsetId + ", " + e.getMessage()); return null; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java index d7eb2ea8c..ed7e7a126 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java @@ -320,6 +320,11 @@ public void testSerializeWithMap() throws Exception js1._cachedObjects.put("cachedString", "foo"); js1._cachedObjects.put("cachedLong", 2L); + LongHashMap longMap = new LongHashMap<>(); + longMap.put(1L, 2L); + + js1._cachedObjects.put("cachedLongMap", longMap); + ObjectMapper mapper = PipelineJob.createObjectMapper(); StringWriter writer = new StringWriter(); @@ -341,10 +346,13 @@ public void testSerializeWithMap() throws Exception //NOTE: this is not serializing properly. the keys are serialized as Strings Map serializedMap = deserialized.getCachedObject("cachedMap", mapper.getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); assertEquals("Map not serialized properly", 1, serializedMap.size()); - - //TODO: determine if we can coax jackson into serializing these properly assertEquals("Object not serialized with correct key type", Integer.class, serializedMap.keySet().iterator().next().getClass()); assertNotNull("Map keys not serialized properly", serializedMap.get(1)); + + LongHashMap serializedLongMap = (LongHashMap)deserialized.getCachedObject("cachedLongMap", LongHashMap.class); + assertEquals("LongMap not serialized properly", 1, serializedLongMap.size()); + assertEquals("Object not serialized with correct key type", Long.class, serializedLongMap.keySet().iterator().next().getClass()); + assertNotNull("LongMap keys not serialized properly", serializedLongMap.get(1L)); } @Test diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index b2adf25a0..45d161346 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -318,14 +318,19 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC throw new PipelineJobException(e); } - if (seuratThreads != null) + Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (seuratThreads == null && maxThreads != null) { - Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); - if (maxThreads != null && maxThreads < seuratThreads) - { - seuratThreads = maxThreads; - } + seuratThreads = maxThreads; + } + else if (seuratThreads != null && maxThreads != null && maxThreads < seuratThreads) + { + ctx.getLogger().debug("Lowering SEURAT_MAX_THREADS based on the job settings, to: " + maxThreads); + seuratThreads = maxThreads; + } + if (seuratThreads != null) + { wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); } diff --git a/singlecell/resources/chunks/ApplyKnownClonotypicData.R b/singlecell/resources/chunks/ApplyKnownClonotypicData.R index d73e4ef1e..4967ef43a 100644 --- a/singlecell/resources/chunks/ApplyKnownClonotypicData.R +++ b/singlecell/resources/chunks/ApplyKnownClonotypicData.R @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- ApplyKnownClonotypicData(seuratObj) + seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList, minActivationFrequency = minActivationFrequency) saveData(seuratObj, datasetId) # Cleanup diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R index 5ada2327a..704273f11 100644 --- a/singlecell/resources/chunks/PerformTcrClustering.R +++ b/singlecell/resources/chunks/PerformTcrClustering.R @@ -2,14 +2,16 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + print(paste0('Calculating distances for: ', datasetId)) seuratObj <- tcrClustR::CalculateTcrDistances( inputData = seuratObj, - chains = c('TRA', 'TRB', 'TRG', 'TRD'), + chains = chains, organism = organism, minimumCloneSize = 2, calculateChainPairs = TRUE ) + print('Performing TCR Clustering') seuratObj <- tcrClustR::RunTcrClustering( seuratObj_TCR = seuratObj, dianaHeight = 20, @@ -22,10 +24,13 @@ for (datasetId in names(seuratObjects)) { } else { for (an in names(seuratObj@misc$TCR_Distances)) { ad <- seuratObj@misc$TCR_Distances[[an]] - print(paste0('Assay: ', an, ', total clones: ', nrow(ad))) + fn <- length(unique(seuratObj[[paste0(an, '_ClusterIdx')]])) + print(paste0('Assay: ', an, ', total clones: ', nrow(ad), '. Distinct families: ', fn)) } } + VisualizeTcrDistances(seuratObj) + saveData(seuratObj, datasetId) # Cleanup diff --git a/singlecell/resources/chunks/PredictTcellActivation.R b/singlecell/resources/chunks/PredictTcellActivation.R index 7b18f1922..c5374cc76 100644 --- a/singlecell/resources/chunks/PredictTcellActivation.R +++ b/singlecell/resources/chunks/PredictTcellActivation.R @@ -2,6 +2,14 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE) + if (length(toDrop) > 0) { + print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', '))) + for (colName in toDrop) { + seuratObj[[toDrop]] <- NULL + } + } + seuratObj <- RIRA::PredictTcellActivation(seuratObj) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R index 6e4656164..b052d69b0 100644 --- a/singlecell/resources/chunks/RunDecoupler.R +++ b/singlecell/resources/chunks/RunDecoupler.R @@ -3,6 +3,11 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) seuratObj <- CellMembrane::RunDecoupleR(seuratObj) + if (!all(is.na(heatmapGroupingVars))) { + for (heatmapGroupingVar in heatmapGroupingVars) { + PlotTfData(seuratObj, groupField = heatmapGroupingVar) + } + } saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index 960bbe631..e9aa951fb 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -5,14 +5,41 @@ if (Sys.getenv('SEURAT_MAX_THREADS') != '') { } for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores) + toDelete <- c() - saveData(seuratObj, datasetId) + vals <- eval(formals(CellMembrane::RunEscape)$msigdbGeneSets) + for (idx in seq_along(vals)) { + geneSetName <- names(vals)[idx] + geneSet <- vals[[idx]] + logger::log_info(paste0('Processing: ', geneSetName, ' / ', geneSet)) - # Cleanup - rm(seuratObj) - gc() + fn <- paste0('escape.', datasetId, '.', ifelse(geneSetName == '', yes = geneSet, no = geneSetName), '.rds') + if (file.exists(fn)) { + logger::log_info(paste0('resuming: ', fn)) + seuratObj <- readRDS(fn) + toDelete <- c(toDelete, fn) + } else { + msigdbGeneSets <- geneSet + if (geneSetName != '') { + names(msigdbGeneSets) <- geneSetName + } + + seuratObj <- CellMembrane::RunEscape(seuratObj, msigdbGeneSets = msigdbGeneSets, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) + saveRDS(seuratObj, file = fn) + toDelete <- c(toDelete, fn) + } + } + + for(fn in toDelete) { + unlink(fn) + } + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() } \ No newline at end of file diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R index f5b41a24b..4d7f9e13e 100644 --- a/singlecell/resources/chunks/UpdateSeuratPrototype.R +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -47,11 +47,20 @@ for (datasetId in names(seuratObjects)) { } if (scoreActivation) { + # Drop existing columns: + toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE) + if (length(toDrop) > 0) { + print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', '))) + for (colName in toDrop) { + seuratObj[[toDrop]] <- NULL + } + } + seuratObj <- RIRA::PredictTcellActivation(seuratObj) } if (recalculateUCells) { - seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores) + seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores, dropAllExistingUcells = TRUE) } saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index f40b0940b..6cb814923 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -48,6 +48,7 @@ import org.labkey.api.singlecell.model.Sample; import org.labkey.api.singlecell.model.Sort; import org.labkey.api.singlecell.pipeline.SeuratToolParameter; +import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; import org.labkey.singlecell.run.CellRangerFeatureBarcodeHandler; @@ -518,12 +519,12 @@ else if (genomeIds.size() > 1) public File getValidCiteSeqBarcodeFile(File sourceDir, long gexReadsetId) { - return new File(sourceDir, "validADTS." + gexReadsetId + ".csv"); + return FileUtil.appendName(sourceDir, "validADTS." + gexReadsetId + ".csv"); } public File getValidCiteSeqBarcodeMetadataFile(File sourceDir, long gexReadsetId) { - return new File(sourceDir, "validADTS." + gexReadsetId + ".metadata.txt"); + return FileUtil.appendName(sourceDir, "validADTS." + gexReadsetId + ".metadata.txt"); } private void writeCiteSeqBarcodes(PipelineJob job, Map> gexToPanels, File outputDir) throws PipelineJobException @@ -585,7 +586,7 @@ private void writeCiteSeqBarcodes(PipelineJob job, Map> gexToP public File getValidHashingBarcodeFile(File sourceDir) { - return new File(sourceDir, "validHashingBarcodes.csv"); + return FileUtil.appendName(sourceDir, "validHashingBarcodes.csv"); } @Override @@ -597,7 +598,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp } parameters.validate(true); - Map readsetToHashing = getCachedHashingReadsetMap(ctx.getSequenceSupport()); + Map readsetToHashing = getCachedHashingReadsetMap(ctx.getSequenceSupport()); if (readsetToHashing.isEmpty()) { ctx.getLogger().info("No cached " + parameters.type.name() + " readsets, skipping"); @@ -621,7 +622,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp ctx.getLogger().debug("total cached readset/" + parameters.type.name() + " readset pairs: " + readsetToHashing.size()); ctx.getLogger().debug("unique indexes: " + lineCount); - Readset htoReadset = ctx.getSequenceSupport().getCachedReadset(readsetToHashing.get(parentReadset.getReadsetId())); + Readset htoReadset = ctx.getSequenceSupport().getCachedReadset((long)readsetToHashing.get(parentReadset.getReadsetId())); if (htoReadset == null) { throw new PipelineJobException("Unable to find HTO readset for readset: " + parentReadset.getRowId()); @@ -723,15 +724,15 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp return callsFile; } - private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException + private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { - return support.getCachedObject(READSET_TO_CITESEQ_MAP, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); + return support.getCachedObject(READSET_TO_CITESEQ_MAP, LongHashMap.class); } @Override public boolean usesCellHashing(SequenceAnalysisJobSupport support, File sourceDir) throws PipelineJobException { - Map gexToHashingMap = getCachedHashingReadsetMap(support); + Map gexToHashingMap = getCachedHashingReadsetMap(support); if (gexToHashingMap == null || gexToHashingMap.isEmpty()) return false; @@ -747,7 +748,7 @@ public boolean usesCellHashing(SequenceAnalysisJobSupport support, File sourceDi @Override public boolean usesCiteSeq(SequenceAnalysisJobSupport support, List inputFiles) throws PipelineJobException { - Map gexToCiteMap = getCachedCiteSeqReadsetMap(support); + Map gexToCiteMap = getCachedCiteSeqReadsetMap(support); if (gexToCiteMap == null || gexToCiteMap.isEmpty()) return false; @@ -784,7 +785,7 @@ public File getH5FileForGexReadset(SequenceAnalysisJobSupport support, long read throw new PipelineJobException("Unable to find loupe file: " + loupe.getPath()); } - File h5 = new File(loupe.getParentFile(), "raw_feature_bc_matrix.h5"); + File h5 = FileUtil.appendName(loupe.getParentFile(), "raw_feature_bc_matrix.h5"); if (!h5.exists()) { throw new PipelineJobException("Unable to find h5 file: " + h5.getPath()); @@ -796,12 +797,12 @@ public File getH5FileForGexReadset(SequenceAnalysisJobSupport support, long read @Override public File getCDNAInfoFile(File sourceDir) { - return new File(sourceDir, "cDNAInfo.txt"); + return FileUtil.appendName(sourceDir, "cDNAInfo.txt"); } - public Map getCachedHashingReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException + public Map getCachedHashingReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { - return support.getCachedObject(READSET_TO_HASHING_MAP, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); + return support.getCachedObject(READSET_TO_HASHING_MAP, LongHashMap.class); } public File getCachedReadsetToCountMatrix(SequenceAnalysisJobSupport support, long readsetId, CellHashingService.BARCODE_TYPE type) throws PipelineJobException @@ -1033,7 +1034,7 @@ public List getHashingCallingParams(boolean allowMethod public File getAllHashingBarcodesFile(File webserverDir) { - return new File(webserverDir, BARCODE_TYPE.hashing.getAllBarcodeFileName()); + return FileUtil.appendName(webserverDir, BARCODE_TYPE.hashing.getAllBarcodeFileName()); } private void writeAllHashingBarcodes(Collection groupNames, User u, Container c, File webserverDir) throws PipelineJobException @@ -1154,12 +1155,12 @@ else if ("Negative".equals(line[htoClassIdx])) private File getExpectedCallsFile(File outputDir, String basename) { - return new File(outputDir, basename + CALL_EXTENSION); + return FileUtil.appendName(outputDir, basename + CALL_EXTENSION); } private File getMolInfoFileFromCounts(File citeSeqCountOutDir) { - return new File(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); + return FileUtil.appendName(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); } public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters, PipelineContext ctx) throws PipelineJobException @@ -1195,11 +1196,11 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; inputFiles.add(cellBarcodeWhitelistFile); - File htmlFile = new File(outputDir, basename + ".html"); - File localHtml = new File(localPipelineDir, htmlFile.getName()); + File htmlFile = FileUtil.appendName(outputDir, basename + ".html"); + File localHtml = FileUtil.appendName(localPipelineDir, htmlFile.getName()); - File countFile = new File(outputDir, basename + ".rawCounts.rds"); - File localCounts = new File(localPipelineDir, countFile.getName()); + File countFile = FileUtil.appendName(outputDir, basename + ".rawCounts.rds"); + File localCounts = FileUtil.appendName(localPipelineDir, countFile.getName()); // Note: if this job fails and then is resumed, having that pre-existing copy of the HTML can pose a problem if (localHtml.exists()) @@ -1219,7 +1220,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St metricsFile.delete(); } - File localRScript = new File(outputDir, "generateCallsWrapper.R"); + File localRScript = FileUtil.appendName(outputDir, "generateCallsWrapper.R"); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL"; @@ -1369,13 +1370,13 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St @Override public File getExistingFeatureBarcodeCountDir(Readset parentReadset, BARCODE_TYPE type, SequenceAnalysisJobSupport support) throws PipelineJobException { - Long childId = type == BARCODE_TYPE.hashing ? getCachedHashingReadsetMap(support).get(parentReadset.getReadsetId()) : getCachedCiteSeqReadsetMap(support).get(parentReadset.getReadsetId()); + Integer childId = type == BARCODE_TYPE.hashing ? getCachedHashingReadsetMap(support).get(parentReadset.getReadsetId()) : getCachedCiteSeqReadsetMap(support).get(parentReadset.getReadsetId()); if (childId == null) { throw new PipelineJobException("Unable to find cached readset of type " + type.name() + " for parent: " + parentReadset.getReadsetId()); } - File ret = getCachedReadsetToCountMatrix(support, childId, type); + File ret = getCachedReadsetToCountMatrix(support, (long)childId, type); if (ret == null) { throw new PipelineJobException("Unable to find cached count matrix of type " + type.name() + " for parent: " + parentReadset.getReadsetId()); @@ -1394,7 +1395,7 @@ public void copyHtmlLocally(SequenceOutputHandler.JobContext ctx) throws Pipelin if (f.getName().endsWith(".hashing.html")) { ctx.getLogger().info("Copying hashing HTML locally for debugging: " + f.getName()); - File target = new File(ctx.getSourceDirectory(), f.getName()); + File target = FileUtil.appendName(ctx.getSourceDirectory(), f.getName()); if (target.exists()) { target.delete(); @@ -1420,7 +1421,7 @@ public void copyHtmlLocally(SequenceOutputHandler.JobContext ctx) throws Pipelin @Override public Set getHtosForParentReadset(Long parentReadsetId, File webserverJobDir, SequenceAnalysisJobSupport support, boolean throwIfNotFound) throws PipelineJobException { - Long htoReadset = getCachedHashingReadsetMap(support).get(parentReadsetId); + Integer htoReadset = getCachedHashingReadsetMap(support).get(parentReadsetId); if (htoReadset == null) { if (throwIfNotFound) @@ -1433,7 +1434,7 @@ public Set getHtosForParentReadset(Long parentReadsetId, File webserverJ } } - return getHtosForReadset(htoReadset, webserverJobDir); + return getHtosForReadset((long)htoReadset, webserverJobDir); } public Set getHtosForReadset(Long hashingReadsetId, File webserverJobDir) throws PipelineJobException @@ -1461,7 +1462,7 @@ public Set getHtosForReadset(Long hashingReadsetId, File webserverJobDir public File subsetBarcodes(File allCellBarcodes, @Nullable String barcodePrefix) throws PipelineJobException { //Subset barcodes by dataset: - File output = new File(allCellBarcodes.getParentFile(), "cellBarcodeWhitelist." + (barcodePrefix == null ? "all" : barcodePrefix ) + ".txt"); + File output = FileUtil.appendName(allCellBarcodes.getParentFile(), "cellBarcodeWhitelist." + (barcodePrefix == null ? "all" : barcodePrefix ) + ".txt"); try (CSVReader reader = new CSVReader(Readers.getReader(allCellBarcodes), '\t'); CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(output), '\t', CSVWriter.NO_QUOTE_CHARACTER)) { String[] line; @@ -1490,7 +1491,7 @@ public File getCellBarcodesFromSeurat(File seuratObj) public File getCellBarcodesFromSeurat(File seuratObj, boolean throwIfNotFound) { - File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "cellBarcodes.csv")); + File barcodes = FileUtil.appendName(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "cellBarcodes.csv")); if (throwIfNotFound && !barcodes.exists()) { throw new IllegalArgumentException("Unable to find expected cell barcodes file. This might indicate the seurat object was created with an older version of the pipeline. Expected: " + barcodes.getPath()); @@ -1506,7 +1507,7 @@ public File getMetaTableFromSeurat(File seuratObj) public File getMetaTableFromSeurat(File seuratObj, boolean throwIfNotFound) { - File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt.gz")); + File barcodes = FileUtil.appendName(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt.gz")); if (throwIfNotFound && !barcodes.exists()) { throw new IllegalArgumentException("Unable to find expected metadata file. This might indicate the seurat object was created with an older version of the pipeline. Expected: " + barcodes.getPath()); diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index e9edcf5b3..7c3d2045f 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -1030,7 +1030,7 @@ else if ("NotUsed".equals(val)) } } - if (saturationIdx >= 0) + if (saturationIdx >= 0 && !"NA".equals(line[saturationIdx]) && !StringUtils.isEmpty(line[saturationIdx])) { double saturation = Double.parseDouble(line[saturationIdx]); totalSaturation += saturation; diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java index 1f6dbf45b..ac631dde2 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -1,7 +1,9 @@ package org.labkey.singlecell.pipeline.singlecell; +import org.json.JSONObject; import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; import org.labkey.api.singlecell.pipeline.SingleCellStep; import java.util.List; @@ -18,8 +20,26 @@ public static class Provider extends AbstractPipelineStepProvider private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(HashMap.class, Long.class, Long.class)); + LongHashMap map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, LongHashMap.class); Long dataId = map.get(rs.getReadsetId()); if (dataId == null) { @@ -199,7 +199,7 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException } // Try to find 10x barcodes: - HashMap readsetToLoupe = new HashMap<>(); + LongHashMap readsetToLoupe = new LongHashMap<>(); for (Readset rs : support.getCachedReadsets()) { ExpData f = findLoupeFile(rs);