From fdc468e950c4564762e07e4bf35bd38303334f92 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Dec 2025 13:40:29 -0800 Subject: [PATCH 01/23] Use LongHashMap --- .../src/org/labkey/singlecell/run/NimbleAlignmentStep.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 07cd1eeea..3244a46d4 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -3,6 +3,7 @@ import org.apache.commons.io.FileUtils; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; +import org.labkey.api.collections.LongHashMap; import org.labkey.api.data.Container; import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.Sort; @@ -116,7 +117,7 @@ private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(HashMap.class, Long.class, Long.class)); + Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Long.class, Long.class)); Long dataId = map.get(rs.getReadsetId()); if (dataId == null) { @@ -199,7 +200,7 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException } // Try to find 10x barcodes: - HashMap readsetToLoupe = new HashMap<>(); + LongHashMap readsetToLoupe = new LongHashMap<>(); for (Readset rs : support.getCachedReadsets()) { ExpData f = findLoupeFile(rs); From 4c1afe33ef5d0229a3ef396f214603d72b7ef847 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Dec 2025 14:19:49 -0800 Subject: [PATCH 02/23] Use LongHashMap to deserialize --- .../src/org/labkey/singlecell/run/NimbleAlignmentStep.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 3244a46d4..8e0e45dca 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -31,7 +31,6 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -117,7 +116,7 @@ private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Long.class, Long.class)); + Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(LongHashMap.class, Long.class, Long.class)); Long dataId = map.get(rs.getReadsetId()); if (dataId == null) { From 87dffd77f2718595e9029c3d3b9710b348202666 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Dec 2025 15:06:35 -0800 Subject: [PATCH 03/23] Use LongHashMap to serialize, plus testing --- .../pipeline/SequenceJobSupportImpl.java | 12 ++++++++++-- .../labkey/singlecell/run/NimbleAlignmentStep.java | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java index d7eb2ea8c..ed7e7a126 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java @@ -320,6 +320,11 @@ public void testSerializeWithMap() throws Exception js1._cachedObjects.put("cachedString", "foo"); js1._cachedObjects.put("cachedLong", 2L); + LongHashMap longMap = new LongHashMap<>(); + longMap.put(1L, 2L); + + js1._cachedObjects.put("cachedLongMap", longMap); + ObjectMapper mapper = PipelineJob.createObjectMapper(); StringWriter writer = new StringWriter(); @@ -341,10 +346,13 @@ public void testSerializeWithMap() throws Exception //NOTE: this is not serializing properly. the keys are serialized as Strings Map serializedMap = deserialized.getCachedObject("cachedMap", mapper.getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); assertEquals("Map not serialized properly", 1, serializedMap.size()); - - //TODO: determine if we can coax jackson into serializing these properly assertEquals("Object not serialized with correct key type", Integer.class, serializedMap.keySet().iterator().next().getClass()); assertNotNull("Map keys not serialized properly", serializedMap.get(1)); + + LongHashMap serializedLongMap = (LongHashMap)deserialized.getCachedObject("cachedLongMap", LongHashMap.class); + assertEquals("LongMap not serialized properly", 1, serializedLongMap.size()); + assertEquals("Object not serialized with correct key type", Long.class, serializedLongMap.keySet().iterator().next().getClass()); + assertNotNull("LongMap keys not serialized properly", serializedLongMap.get(1L)); } @Test diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 8e0e45dca..a06484d2d 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -116,7 +116,7 @@ private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(LongHashMap.class, Long.class, Long.class)); + LongHashMap map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, LongHashMap.class); Long dataId = map.get(rs.getReadsetId()); if (dataId == null) { From 39737a093fed6dbf4f05e7f9749922ff297a6091 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 17 Dec 2025 19:17:54 -0800 Subject: [PATCH 04/23] Allow for space in HTML --- .../src/org/labkey/singlecell/run/CellRangerGexCountStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 471b89a7f..b157ea802 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -693,7 +693,7 @@ public static Chemistry inferChemistry(File cloupeFile) throws PipelineJobExcept throw new IllegalArgumentException("Missing file: " + html.getPath()); } - final Pattern pattern = Pattern.compile("\\[\"Chemistry\",\"(.*?)\"],"); + final Pattern pattern = Pattern.compile("\\[\"Chemistry\",[ ]{0,1}\"(.*?)\"],"); try (BufferedReader reader = Readers.getReader(html)) { String line; From be5a839f8d581e564e7b10ec6bb3c3083833eee3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 18 Dec 2025 06:10:26 -0800 Subject: [PATCH 05/23] Add Single Cell 5' PE V1 --- .../org/labkey/singlecell/run/CellRangerGexCountStep.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index b157ea802..bf458ec87 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -625,7 +625,8 @@ public enum Chemistry // See: https://kb.10xgenomics.com/s/article/115004506263-What-is-a-barcode-inclusion-list-formerly-barcode-whitelist // cellranger-x.y.z/lib/python/cellranger/barcodes/ FivePE_V3("Single Cell 5' PE v3", "3M-5pgex-jan-2023.txt.gz"), - FivePE_V2("Single Cell 5' PE v2", "737k-august-2016.txt"); + FivePE_V2("Single Cell 5' PE v2", "737k-august-2016.txt"), + FivePE_V1("Single Cell 5' PE", "737k-april-2014_rc.txt"); final String _label; final String _inclusionListFile; @@ -681,7 +682,7 @@ public static Chemistry getByLabel(String label) } } - throw new IllegalArgumentException("Unknown chemistry: " + label); + throw new IllegalArgumentException("Unknown chemistry: [" + label + "]"); } } From 48bd1ebe6feb48dc5764b5167ffe1313391b0407 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 18 Dec 2025 06:50:07 -0800 Subject: [PATCH 06/23] Improve logging in PerformTcrClustering --- singlecell/resources/chunks/PerformTcrClustering.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R index 5ada2327a..7c1623764 100644 --- a/singlecell/resources/chunks/PerformTcrClustering.R +++ b/singlecell/resources/chunks/PerformTcrClustering.R @@ -2,6 +2,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + print(paste0('Calculating distances for: ', datasetId)) seuratObj <- tcrClustR::CalculateTcrDistances( inputData = seuratObj, chains = c('TRA', 'TRB', 'TRG', 'TRD'), @@ -10,6 +11,7 @@ for (datasetId in names(seuratObjects)) { calculateChainPairs = TRUE ) + print('Performing TCR Clustering') seuratObj <- tcrClustR::RunTcrClustering( seuratObj_TCR = seuratObj, dianaHeight = 20, @@ -22,7 +24,8 @@ for (datasetId in names(seuratObjects)) { } else { for (an in names(seuratObj@misc$TCR_Distances)) { ad <- seuratObj@misc$TCR_Distances[[an]] - print(paste0('Assay: ', an, ', total clones: ', nrow(ad))) + fn <- length(unique(seuratObj[[paste0(an, '_ClusterIdx')]])) + print(paste0('Assay: ', an, ', total clones: ', nrow(ad), '. Distinct families: ', fn)) } } From a9b3e88d1a8de6995430f244bbf96c5d00e9f378 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 18 Dec 2025 06:57:33 -0800 Subject: [PATCH 07/23] Update filename case --- .../src/org/labkey/singlecell/run/CellRangerGexCountStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index bf458ec87..35a2083a5 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -626,7 +626,7 @@ public enum Chemistry // cellranger-x.y.z/lib/python/cellranger/barcodes/ FivePE_V3("Single Cell 5' PE v3", "3M-5pgex-jan-2023.txt.gz"), FivePE_V2("Single Cell 5' PE v2", "737k-august-2016.txt"), - FivePE_V1("Single Cell 5' PE", "737k-april-2014_rc.txt"); + FivePE_V1("Single Cell 5' PE", "737K-april-2014_rc.txt"); final String _label; final String _inclusionListFile; From e6146f029c2daad9b8f8fccc6b90cd1127ccded7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 18 Dec 2025 10:10:22 -0800 Subject: [PATCH 08/23] Add VisualizeTcrDistances --- singlecell/resources/chunks/PerformTcrClustering.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R index 7c1623764..15ffdf5aa 100644 --- a/singlecell/resources/chunks/PerformTcrClustering.R +++ b/singlecell/resources/chunks/PerformTcrClustering.R @@ -29,6 +29,8 @@ for (datasetId in names(seuratObjects)) { } } + VisualizeTcrDistances(seuratObj) + saveData(seuratObj, datasetId) # Cleanup From 16309ff7acb613412dce1292d30d5aa055da07e3 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 19 Dec 2025 12:58:54 -0800 Subject: [PATCH 09/23] Support inclusion/exclusion lists for ApplyKnownClonotypicData --- .../chunks/ApplyKnownClonotypicData.R | 2 +- .../singlecell/ApplyKnownClonotypicData.java | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/chunks/ApplyKnownClonotypicData.R b/singlecell/resources/chunks/ApplyKnownClonotypicData.R index d73e4ef1e..627998ae7 100644 --- a/singlecell/resources/chunks/ApplyKnownClonotypicData.R +++ b/singlecell/resources/chunks/ApplyKnownClonotypicData.R @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- ApplyKnownClonotypicData(seuratObj) + seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList) saveData(seuratObj, datasetId) # Cleanup diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java index 1f6dbf45b..dd5fdecf8 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -1,7 +1,9 @@ package org.labkey.singlecell.pipeline.singlecell; +import org.json.JSONObject; import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; import org.labkey.api.singlecell.pipeline.SingleCellStep; import java.util.List; @@ -18,8 +20,21 @@ public static class Provider extends AbstractPipelineStepProvider Date: Sat, 20 Dec 2025 12:13:43 -0800 Subject: [PATCH 10/23] Support additional methods for escape --- singlecell/resources/chunks/RunEscape.R | 2 +- .../labkey/singlecell/pipeline/singlecell/RunEscape.java | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index 960bbe631..91421989c 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -8,7 +8,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores) + seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java index 5de04773d..be3cd79e6 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java @@ -23,6 +23,15 @@ public Provider() SeuratToolParameter.create("outputAssayBaseName", "Output Assay Basename", "The name of the assay to store results", "textfield", new JSONObject(){{ put("allowBank", false); }}, "escape."), + SeuratToolParameter.create("escapeMethod", "Escape Method", "Passed directly to escape::runEscape()", "ldk-simplecombo", new JSONObject() + {{ + put("multiSelect", false); + put("allowBlank", false); + put("storeValues", "ssGSEA;GSVA;UCell;AUCell"); + put("initialValues", "ssGSEA"); + put("delimiter", ";"); + put("joinReturnValue", true); + }}, null), SeuratToolParameter.create("performDimRedux", "Perform DimRedux", "If true, the standard seurat PCA/FindClusters/UMAP process will be run on the escape data. This may be most useful when using a customGeneSet or a smaller set of features/pathways", "checkbox", new JSONObject(){{ }}, false, null, true) From 2893060e658a74ed71d135b5b3859a3d87c5d0de Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Dec 2025 12:23:57 -0800 Subject: [PATCH 11/23] Minor behavior change to SEURAT_MAX_THREADS --- .../AbstractSingleCellPipelineStep.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index b2adf25a0..45d161346 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -318,14 +318,19 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC throw new PipelineJobException(e); } - if (seuratThreads != null) + Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (seuratThreads == null && maxThreads != null) { - Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); - if (maxThreads != null && maxThreads < seuratThreads) - { - seuratThreads = maxThreads; - } + seuratThreads = maxThreads; + } + else if (seuratThreads != null && maxThreads != null && maxThreads < seuratThreads) + { + ctx.getLogger().debug("Lowering SEURAT_MAX_THREADS based on the job settings, to: " + maxThreads); + seuratThreads = maxThreads; + } + if (seuratThreads != null) + { wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); } From fccb1165063161b156db4715dc16aa11690f2e88 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 20 Dec 2025 15:24:47 -0800 Subject: [PATCH 12/23] Also support heatmapGroupingVars for escape --- singlecell/resources/chunks/RunEscape.R | 2 +- .../singlecell/pipeline/singlecell/RunEscape.java | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index 91421989c..dbf23f433 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -8,7 +8,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) + seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java index be3cd79e6..720954635 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunEscape.java @@ -7,6 +7,7 @@ import org.labkey.api.singlecell.pipeline.SingleCellStep; import java.util.Arrays; +import java.util.List; public class RunEscape extends AbstractCellMembraneStep { @@ -34,8 +35,15 @@ public Provider() }}, null), SeuratToolParameter.create("performDimRedux", "Perform DimRedux", "If true, the standard seurat PCA/FindClusters/UMAP process will be run on the escape data. This may be most useful when using a customGeneSet or a smaller set of features/pathways", "checkbox", new JSONObject(){{ - }}, false, null, true) - ), null, null); + }}, false, null, true), + SeuratToolParameter.create("heatmapGroupingVars", "Heatmap Grouping Vars", "Enter one field name per line, which will be used to generate a heatmap of results", "sequenceanalysis-trimmingtextarea", new JSONObject() + {{ + put("allowBlank", true); + put("height", 150); + put("delimiter", ","); + put("stripCharsRe", "/['\"]/g"); + }}, "ClusterNames_0.2", null, true, true).delimiter(",") + ), List.of("/sequenceanalysis/field/TrimmingTextArea.js"), null); } @Override From 265ad094591553708c5727021a3cabfe25cc6ade Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Dec 2025 06:58:48 -0800 Subject: [PATCH 13/23] Drop clustalw from main tools script --- .../pipeline_code/extra_tools_install.sh | 29 +++++++++++++++++ .../pipeline_code/sequence_tools_install.sh | 31 ------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index e85469e5b..2da4a5037 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -379,3 +379,32 @@ then else echo "Already installed" fi + +# +#clustalw +# + +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Installing ClustalW" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]]; +then + rm -Rf clustal* + rm -Rf 1.2.4-cmake.tar.gz + rm -Rf $LKTOOLS_DIR/clustalw2 + + wget $WGET_OPTS https://github.com/GSLBiotech/clustal-omega/archive/refs/tags/1.2.4-cmake.tar.gz + tar -xf 1.2.4-cmake.tar.gz + cd clustal-omega-1.2.4-cmake + ./configure + make + + install ./src/clustalw2 $LKTOOLS_DIR/clustalw2 + +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 3e31985f4..9f6442e56 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -999,37 +999,6 @@ else fi -# -#clustalw -# - -echo "" -echo "" -echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Installing ClustalW" -echo "" -cd $LKSRC_DIR - -if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]]; -then - rm -Rf clustalw-* - rm -Rf $LKTOOLS_DIR/clustalw2 - - wget $WGET_OPTS http://www.clustal.org/download/current/clustalw-2.1.tar.gz - gunzip clustalw-2.1.tar.gz - tar -xf clustalw-2.1.tar - gzip clustalw-2.1.tar - cd clustalw-2.1 - ./configure - make - - install ./src/clustalw2 $LKTOOLS_DIR/clustalw2 - -else - echo "Already installed" -fi - - # #muscle # From 70058a9e44a5383a6b250752a840bfe1cb97c190 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Dec 2025 13:49:20 -0800 Subject: [PATCH 14/23] Bugfix to serialization of Map --- .../singlecell/CellHashingServiceImpl.java | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index f40b0940b..35266ee98 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -48,6 +48,7 @@ import org.labkey.api.singlecell.model.Sample; import org.labkey.api.singlecell.model.Sort; import org.labkey.api.singlecell.pipeline.SeuratToolParameter; +import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; import org.labkey.singlecell.run.CellRangerFeatureBarcodeHandler; @@ -518,12 +519,12 @@ else if (genomeIds.size() > 1) public File getValidCiteSeqBarcodeFile(File sourceDir, long gexReadsetId) { - return new File(sourceDir, "validADTS." + gexReadsetId + ".csv"); + return FileUtil.appendName(sourceDir, "validADTS." + gexReadsetId + ".csv"); } public File getValidCiteSeqBarcodeMetadataFile(File sourceDir, long gexReadsetId) { - return new File(sourceDir, "validADTS." + gexReadsetId + ".metadata.txt"); + return FileUtil.appendName(sourceDir, "validADTS." + gexReadsetId + ".metadata.txt"); } private void writeCiteSeqBarcodes(PipelineJob job, Map> gexToPanels, File outputDir) throws PipelineJobException @@ -585,7 +586,7 @@ private void writeCiteSeqBarcodes(PipelineJob job, Map> gexToP public File getValidHashingBarcodeFile(File sourceDir) { - return new File(sourceDir, "validHashingBarcodes.csv"); + return FileUtil.appendName(sourceDir, "validHashingBarcodes.csv"); } @Override @@ -725,7 +726,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { - return support.getCachedObject(READSET_TO_CITESEQ_MAP, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); + return support.getCachedObject(READSET_TO_CITESEQ_MAP, LongHashMap.class); } @Override @@ -784,7 +785,7 @@ public File getH5FileForGexReadset(SequenceAnalysisJobSupport support, long read throw new PipelineJobException("Unable to find loupe file: " + loupe.getPath()); } - File h5 = new File(loupe.getParentFile(), "raw_feature_bc_matrix.h5"); + File h5 = FileUtil.appendName(loupe.getParentFile(), "raw_feature_bc_matrix.h5"); if (!h5.exists()) { throw new PipelineJobException("Unable to find h5 file: " + h5.getPath()); @@ -796,12 +797,12 @@ public File getH5FileForGexReadset(SequenceAnalysisJobSupport support, long read @Override public File getCDNAInfoFile(File sourceDir) { - return new File(sourceDir, "cDNAInfo.txt"); + return FileUtil.appendName(sourceDir, "cDNAInfo.txt"); } public Map getCachedHashingReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { - return support.getCachedObject(READSET_TO_HASHING_MAP, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); + return support.getCachedObject(READSET_TO_HASHING_MAP, LongHashMap.class); } public File getCachedReadsetToCountMatrix(SequenceAnalysisJobSupport support, long readsetId, CellHashingService.BARCODE_TYPE type) throws PipelineJobException @@ -1033,7 +1034,7 @@ public List getHashingCallingParams(boolean allowMethod public File getAllHashingBarcodesFile(File webserverDir) { - return new File(webserverDir, BARCODE_TYPE.hashing.getAllBarcodeFileName()); + return FileUtil.appendName(webserverDir, BARCODE_TYPE.hashing.getAllBarcodeFileName()); } private void writeAllHashingBarcodes(Collection groupNames, User u, Container c, File webserverDir) throws PipelineJobException @@ -1154,12 +1155,12 @@ else if ("Negative".equals(line[htoClassIdx])) private File getExpectedCallsFile(File outputDir, String basename) { - return new File(outputDir, basename + CALL_EXTENSION); + return FileUtil.appendName(outputDir, basename + CALL_EXTENSION); } private File getMolInfoFileFromCounts(File citeSeqCountOutDir) { - return new File(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); + return FileUtil.appendName(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); } public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters, PipelineContext ctx) throws PipelineJobException @@ -1195,11 +1196,11 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; inputFiles.add(cellBarcodeWhitelistFile); - File htmlFile = new File(outputDir, basename + ".html"); - File localHtml = new File(localPipelineDir, htmlFile.getName()); + File htmlFile = FileUtil.appendName(outputDir, basename + ".html"); + File localHtml = FileUtil.appendName(localPipelineDir, htmlFile.getName()); - File countFile = new File(outputDir, basename + ".rawCounts.rds"); - File localCounts = new File(localPipelineDir, countFile.getName()); + File countFile = FileUtil.appendName(outputDir, basename + ".rawCounts.rds"); + File localCounts = FileUtil.appendName(localPipelineDir, countFile.getName()); // Note: if this job fails and then is resumed, having that pre-existing copy of the HTML can pose a problem if (localHtml.exists()) @@ -1219,7 +1220,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St metricsFile.delete(); } - File localRScript = new File(outputDir, "generateCallsWrapper.R"); + File localRScript = FileUtil.appendName(outputDir, "generateCallsWrapper.R"); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL"; @@ -1394,7 +1395,7 @@ public void copyHtmlLocally(SequenceOutputHandler.JobContext ctx) throws Pipelin if (f.getName().endsWith(".hashing.html")) { ctx.getLogger().info("Copying hashing HTML locally for debugging: " + f.getName()); - File target = new File(ctx.getSourceDirectory(), f.getName()); + File target = FileUtil.appendName(ctx.getSourceDirectory(), f.getName()); if (target.exists()) { target.delete(); @@ -1461,7 +1462,7 @@ public Set getHtosForReadset(Long hashingReadsetId, File webserverJobDir public File subsetBarcodes(File allCellBarcodes, @Nullable String barcodePrefix) throws PipelineJobException { //Subset barcodes by dataset: - File output = new File(allCellBarcodes.getParentFile(), "cellBarcodeWhitelist." + (barcodePrefix == null ? "all" : barcodePrefix ) + ".txt"); + File output = FileUtil.appendName(allCellBarcodes.getParentFile(), "cellBarcodeWhitelist." + (barcodePrefix == null ? "all" : barcodePrefix ) + ".txt"); try (CSVReader reader = new CSVReader(Readers.getReader(allCellBarcodes), '\t'); CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(output), '\t', CSVWriter.NO_QUOTE_CHARACTER)) { String[] line; @@ -1490,7 +1491,7 @@ public File getCellBarcodesFromSeurat(File seuratObj) public File getCellBarcodesFromSeurat(File seuratObj, boolean throwIfNotFound) { - File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "cellBarcodes.csv")); + File barcodes = FileUtil.appendName(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "cellBarcodes.csv")); if (throwIfNotFound && !barcodes.exists()) { throw new IllegalArgumentException("Unable to find expected cell barcodes file. This might indicate the seurat object was created with an older version of the pipeline. Expected: " + barcodes.getPath()); @@ -1506,7 +1507,7 @@ public File getMetaTableFromSeurat(File seuratObj) public File getMetaTableFromSeurat(File seuratObj, boolean throwIfNotFound) { - File barcodes = new File(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt.gz")); + File barcodes = FileUtil.appendName(seuratObj.getParentFile(), seuratObj.getName().replaceAll("seurat.rds$", "seurat.meta.txt.gz")); if (throwIfNotFound && !barcodes.exists()) { throw new IllegalArgumentException("Unable to find expected metadata file. This might indicate the seurat object was created with an older version of the pipeline. Expected: " + barcodes.getPath()); From 431345be78bf4757af84c0613d619d050dfb3f78 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 22 Dec 2025 19:06:44 -0800 Subject: [PATCH 15/23] Another bugfix to serialization of Map --- .../singlecell/CellHashingServiceImpl.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 35266ee98..6cb814923 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -598,7 +598,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp } parameters.validate(true); - Map readsetToHashing = getCachedHashingReadsetMap(ctx.getSequenceSupport()); + Map readsetToHashing = getCachedHashingReadsetMap(ctx.getSequenceSupport()); if (readsetToHashing.isEmpty()) { ctx.getLogger().info("No cached " + parameters.type.name() + " readsets, skipping"); @@ -622,7 +622,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp ctx.getLogger().debug("total cached readset/" + parameters.type.name() + " readset pairs: " + readsetToHashing.size()); ctx.getLogger().debug("unique indexes: " + lineCount); - Readset htoReadset = ctx.getSequenceSupport().getCachedReadset(readsetToHashing.get(parentReadset.getReadsetId())); + Readset htoReadset = ctx.getSequenceSupport().getCachedReadset((long)readsetToHashing.get(parentReadset.getReadsetId())); if (htoReadset == null) { throw new PipelineJobException("Unable to find HTO readset for readset: " + parentReadset.getRowId()); @@ -724,7 +724,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp return callsFile; } - private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException + private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { return support.getCachedObject(READSET_TO_CITESEQ_MAP, LongHashMap.class); } @@ -732,7 +732,7 @@ private Map getCachedCiteSeqReadsetMap(SequenceAnalysisJobSupport su @Override public boolean usesCellHashing(SequenceAnalysisJobSupport support, File sourceDir) throws PipelineJobException { - Map gexToHashingMap = getCachedHashingReadsetMap(support); + Map gexToHashingMap = getCachedHashingReadsetMap(support); if (gexToHashingMap == null || gexToHashingMap.isEmpty()) return false; @@ -748,7 +748,7 @@ public boolean usesCellHashing(SequenceAnalysisJobSupport support, File sourceDi @Override public boolean usesCiteSeq(SequenceAnalysisJobSupport support, List inputFiles) throws PipelineJobException { - Map gexToCiteMap = getCachedCiteSeqReadsetMap(support); + Map gexToCiteMap = getCachedCiteSeqReadsetMap(support); if (gexToCiteMap == null || gexToCiteMap.isEmpty()) return false; @@ -800,7 +800,7 @@ public File getCDNAInfoFile(File sourceDir) return FileUtil.appendName(sourceDir, "cDNAInfo.txt"); } - public Map getCachedHashingReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException + public Map getCachedHashingReadsetMap(SequenceAnalysisJobSupport support) throws PipelineJobException { return support.getCachedObject(READSET_TO_HASHING_MAP, LongHashMap.class); } @@ -1370,13 +1370,13 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St @Override public File getExistingFeatureBarcodeCountDir(Readset parentReadset, BARCODE_TYPE type, SequenceAnalysisJobSupport support) throws PipelineJobException { - Long childId = type == BARCODE_TYPE.hashing ? getCachedHashingReadsetMap(support).get(parentReadset.getReadsetId()) : getCachedCiteSeqReadsetMap(support).get(parentReadset.getReadsetId()); + Integer childId = type == BARCODE_TYPE.hashing ? getCachedHashingReadsetMap(support).get(parentReadset.getReadsetId()) : getCachedCiteSeqReadsetMap(support).get(parentReadset.getReadsetId()); if (childId == null) { throw new PipelineJobException("Unable to find cached readset of type " + type.name() + " for parent: " + parentReadset.getReadsetId()); } - File ret = getCachedReadsetToCountMatrix(support, childId, type); + File ret = getCachedReadsetToCountMatrix(support, (long)childId, type); if (ret == null) { throw new PipelineJobException("Unable to find cached count matrix of type " + type.name() + " for parent: " + parentReadset.getReadsetId()); @@ -1421,7 +1421,7 @@ public void copyHtmlLocally(SequenceOutputHandler.JobContext ctx) throws Pipelin @Override public Set getHtosForParentReadset(Long parentReadsetId, File webserverJobDir, SequenceAnalysisJobSupport support, boolean throwIfNotFound) throws PipelineJobException { - Long htoReadset = getCachedHashingReadsetMap(support).get(parentReadsetId); + Integer htoReadset = getCachedHashingReadsetMap(support).get(parentReadsetId); if (htoReadset == null) { if (throwIfNotFound) @@ -1434,7 +1434,7 @@ public Set getHtosForParentReadset(Long parentReadsetId, File webserverJ } } - return getHtosForReadset(htoReadset, webserverJobDir); + return getHtosForReadset((long)htoReadset, webserverJobDir); } public Set getHtosForReadset(Long hashingReadsetId, File webserverJobDir) throws PipelineJobException From 2950346dec865c3c489d69716ce059e49f08d33a Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 25 Dec 2025 08:27:49 -0800 Subject: [PATCH 16/23] Better resume behavior for escape --- singlecell/resources/chunks/RunEscape.R | 41 +++++++++++++++---- .../analysis/AbstractSingleCellHandler.java | 2 +- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R index dbf23f433..e9aa951fb 100644 --- a/singlecell/resources/chunks/RunEscape.R +++ b/singlecell/resources/chunks/RunEscape.R @@ -5,14 +5,41 @@ if (Sys.getenv('SEURAT_MAX_THREADS') != '') { } for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) + toDelete <- c() - saveData(seuratObj, datasetId) + vals <- eval(formals(CellMembrane::RunEscape)$msigdbGeneSets) + for (idx in seq_along(vals)) { + geneSetName <- names(vals)[idx] + geneSet <- vals[[idx]] + logger::log_info(paste0('Processing: ', geneSetName, ' / ', geneSet)) - # Cleanup - rm(seuratObj) - gc() + fn <- paste0('escape.', datasetId, '.', ifelse(geneSetName == '', yes = geneSet, no = geneSetName), '.rds') + if (file.exists(fn)) { + logger::log_info(paste0('resuming: ', fn)) + seuratObj <- readRDS(fn) + toDelete <- c(toDelete, fn) + } else { + msigdbGeneSets <- geneSet + if (geneSetName != '') { + names(msigdbGeneSets) <- geneSetName + } + + seuratObj <- CellMembrane::RunEscape(seuratObj, msigdbGeneSets = msigdbGeneSets, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores) + saveRDS(seuratObj, file = fn) + toDelete <- c(toDelete, fn) + } + } + + for(fn in toDelete) { + unlink(fn) + } + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() } \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index e9edcf5b3..7c3d2045f 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -1030,7 +1030,7 @@ else if ("NotUsed".equals(val)) } } - if (saturationIdx >= 0) + if (saturationIdx >= 0 && !"NA".equals(line[saturationIdx]) && !StringUtils.isEmpty(line[saturationIdx])) { double saturation = Double.parseDouble(line[saturationIdx]); totalSaturation += saturation; From 1a1bd9477a26a0842394bb0953e775b42b48b585 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 25 Dec 2025 08:32:06 -0800 Subject: [PATCH 17/23] Allow tcrClustR to specify a subset of chains --- singlecell/resources/chunks/PerformTcrClustering.R | 2 +- .../pipeline/singlecell/PerformTcrClustering.java | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R index 15ffdf5aa..704273f11 100644 --- a/singlecell/resources/chunks/PerformTcrClustering.R +++ b/singlecell/resources/chunks/PerformTcrClustering.R @@ -5,7 +5,7 @@ for (datasetId in names(seuratObjects)) { print(paste0('Calculating distances for: ', datasetId)) seuratObj <- tcrClustR::CalculateTcrDistances( inputData = seuratObj, - chains = c('TRA', 'TRB', 'TRG', 'TRD'), + chains = chains, organism = organism, minimumCloneSize = 2, calculateChainPairs = TRUE diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java index 919b64cf0..c4e9b261f 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java @@ -28,7 +28,16 @@ public Provider() put("initialValues", "human"); put("delimiter", ";"); put("joinReturnValue", true); - }}, null) + }}, null), + SeuratToolParameter.create("chains", "Chains", "The chains to process", "ldk-simplecombo", new JSONObject() + {{ + put("multiSelect", true); + put("allowBlank", false); + put("storeValues", "TRA;TRB;TRG;TRD"); + put("initialValues", "TRA;TRB;TRG;TRD"); + put("delimiter", ";"); + put("joinReturnValue", true); + }}, "TRA;TRB;TRG;TRD", null, true, true) ), null, "https://github.com/bimberlabinternal/tcrClustR/"); } From 18182645c9ff25a4bed13e3747b054f0f9f238c7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 25 Dec 2025 17:46:41 -0800 Subject: [PATCH 18/23] More reporting for RunDecoupler --- singlecell/resources/chunks/RunDecoupler.R | 5 +++++ .../singlecell/pipeline/singlecell/RunDecoupler.java | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R index 6e4656164..b052d69b0 100644 --- a/singlecell/resources/chunks/RunDecoupler.R +++ b/singlecell/resources/chunks/RunDecoupler.R @@ -3,6 +3,11 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) seuratObj <- CellMembrane::RunDecoupleR(seuratObj) + if (!all(is.na(heatmapGroupingVars))) { + for (heatmapGroupingVar in heatmapGroupingVars) { + PlotTfData(seuratObj, groupField = heatmapGroupingVar) + } + } saveData(seuratObj, datasetId) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java index 211d8d935..5235b8f31 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java @@ -1,7 +1,9 @@ package org.labkey.singlecell.pipeline.singlecell; +import org.json.JSONObject; import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; import org.labkey.api.singlecell.pipeline.SingleCellStep; import java.util.Arrays; @@ -18,7 +20,13 @@ public static class Provider extends AbstractPipelineStepProvider Date: Sat, 27 Dec 2025 16:54:39 -0800 Subject: [PATCH 19/23] Improve cleanup of old columns --- singlecell/resources/chunks/PredictTcellActivation.R | 8 ++++++++ singlecell/resources/chunks/UpdateSeuratPrototype.R | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/PredictTcellActivation.R b/singlecell/resources/chunks/PredictTcellActivation.R index 7b18f1922..c5374cc76 100644 --- a/singlecell/resources/chunks/PredictTcellActivation.R +++ b/singlecell/resources/chunks/PredictTcellActivation.R @@ -2,6 +2,14 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE) + if (length(toDrop) > 0) { + print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', '))) + for (colName in toDrop) { + seuratObj[[toDrop]] <- NULL + } + } + seuratObj <- RIRA::PredictTcellActivation(seuratObj) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R index f5b41a24b..4d7f9e13e 100644 --- a/singlecell/resources/chunks/UpdateSeuratPrototype.R +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -47,11 +47,20 @@ for (datasetId in names(seuratObjects)) { } if (scoreActivation) { + # Drop existing columns: + toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE) + if (length(toDrop) > 0) { + print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', '))) + for (colName in toDrop) { + seuratObj[[toDrop]] <- NULL + } + } + seuratObj <- RIRA::PredictTcellActivation(seuratObj) } if (recalculateUCells) { - seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores) + seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores, dropAllExistingUcells = TRUE) } saveData(seuratObj, datasetId) From 1e6256d05f0ec59a390ec169580169b87ef8a1db Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 28 Dec 2025 14:48:41 -0800 Subject: [PATCH 20/23] Change field defaults --- .../pipeline/singlecell/ApplyKnownClonotypicData.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java index dd5fdecf8..3c8f124d5 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -22,14 +22,12 @@ public Provider() super("ApplyKnownClonotypicData", "Append Known Clonotype/Antigen Data", "RDiscvr", "This will query the clone_responses table and append a column tagging each cell for matching antigens (based on clonotype)", List.of( SeuratToolParameter.create("antigenInclusionList", "Antigen(s) to Include", "Enter antigens, per line. Only stims using these antigens will be used", "sequenceanalysis-trimmingtextarea", new JSONObject() {{ - put("allowBlank", false); put("height", 150); put("delimiter", ","); put("stripCharsRe", "/['\"]/g"); }}, null, null, true, true).delimiter(","), SeuratToolParameter.create("antigenExclusionList", "Antigen(s) to Exclude", "Enter antigens, per line. Stims using these antigens will be excluded", "sequenceanalysis-trimmingtextarea", new JSONObject() {{ - put("allowBlank", false); put("height", 150); put("delimiter", ","); put("stripCharsRe", "/['\"]/g"); From 65edb7e93253a2503e06167a3ea016226507b59e Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 30 Dec 2025 13:10:04 -0800 Subject: [PATCH 21/23] Bugfix to ArchiveReadsetsAction --- .../sequenceanalysis/SequenceAnalysisController.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index 68f786120..735ad9ea9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -5072,6 +5072,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw Set toDelete = new HashSet<>(); List> toUpdate = new ArrayList<>(); + Set encounteredReaddata = new HashSet<>(); for (ReadData rd : rs.getReadData()) { if (rd.getSra_accession() == null) @@ -5080,7 +5081,14 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw return null; } + // A given ReadData can be referenced by multiple readsets + if (encounteredReaddata.contains(rd.getRowid())) + { + continue; + } + toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", rd.getRowid(), "archived", true, "container", rd.getContainer()))); + encounteredReaddata.add(rd.getRowid()); // File 1: ExpData d1 = ExperimentService.get().getExpData(rd.getFileId1()); @@ -5134,7 +5142,6 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw { List> keys = new ArrayList<>(); toUpdate.forEach(row -> { - keys.add(new CaseInsensitiveHashMap<>(Map.of("rowid", row.get("rowid")))); }); @@ -5144,7 +5151,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw } catch (Exception e) { - _log.error(e); + _log.error("Error archiving readsets", e); errors.reject(ERROR_MSG, "Error archiving readset: " + readsetId + ", " + e.getMessage()); return null; } From efcd54eceba6a704741752913fedd10ab63230bc Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Dec 2025 07:43:00 -0800 Subject: [PATCH 22/23] Allow whitespace in field --- .../pipeline/singlecell/ApplyKnownClonotypicData.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java index 3c8f124d5..18296c0d6 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -25,12 +25,14 @@ public Provider() put("height", 150); put("delimiter", ","); put("stripCharsRe", "/['\"]/g"); + put("replaceAllWhitespace", false); }}, null, null, true, true).delimiter(","), SeuratToolParameter.create("antigenExclusionList", "Antigen(s) to Exclude", "Enter antigens, per line. Stims using these antigens will be excluded", "sequenceanalysis-trimmingtextarea", new JSONObject() {{ put("height", 150); put("delimiter", ","); put("stripCharsRe", "/['\"]/g"); + put("replaceAllWhitespace", false); }}, null, null, true, true).delimiter(",") ), List.of("/sequenceanalysis/field/TrimmingTextArea.js"), null); } From 552d9ce0c4843ef9829d67c3eb16cab7c722e15e Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 31 Dec 2025 08:43:38 -0800 Subject: [PATCH 23/23] Support more clonotype filters --- singlecell/resources/chunks/ApplyKnownClonotypicData.R | 2 +- .../pipeline/singlecell/ApplyKnownClonotypicData.java | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/chunks/ApplyKnownClonotypicData.R b/singlecell/resources/chunks/ApplyKnownClonotypicData.R index 627998ae7..4967ef43a 100644 --- a/singlecell/resources/chunks/ApplyKnownClonotypicData.R +++ b/singlecell/resources/chunks/ApplyKnownClonotypicData.R @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList) + seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList, minActivationFrequency = minActivationFrequency) saveData(seuratObj, datasetId) # Cleanup diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java index 18296c0d6..ac631dde2 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/ApplyKnownClonotypicData.java @@ -33,7 +33,12 @@ public Provider() put("delimiter", ","); put("stripCharsRe", "/['\"]/g"); put("replaceAllWhitespace", false); - }}, null, null, true, true).delimiter(",") + }}, null, null, true, true).delimiter(","), + SeuratToolParameter.create("minActivationFrequency", "Minimum Activation Frequency", "If provided, only responses with an activation frequency (of the parent population), will be included", "ldk-numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1.0); + put("decimalPrecision", 4); + }}, 0) ), List.of("/sequenceanalysis/field/TrimmingTextArea.js"), null); }