LabKey · bbimber · Jan 2, 2026 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -379,3 +379,32 @@ then
 else
     echo "Already installed"
 fi
+
+#
+#clustalw
+#
+
+echo ""
+echo ""
+echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
+echo "Installing ClustalW"
+echo ""
+cd $LKSRC_DIR
+
+if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]];
+then
+    rm -Rf clustal*
+    rm -Rf 1.2.4-cmake.tar.gz
+    rm -Rf $LKTOOLS_DIR/clustalw2
+
+    wget $WGET_OPTS https://github.com/GSLBiotech/clustal-omega/archive/refs/tags/1.2.4-cmake.tar.gz
+    tar -xf 1.2.4-cmake.tar.gz
+    cd clustal-omega-1.2.4-cmake
+    ./configure
+    make
+
+    install ./src/clustalw2 $LKTOOLS_DIR/clustalw2
+
+else
+    echo "Already installed"
+fi
diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh
@@ -999,37 +999,6 @@ else
 fi
 
 
-#
-#clustalw
-#
-
-echo ""
-echo ""
-echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
-echo "Installing ClustalW"
-echo ""
-cd $LKSRC_DIR
-
-if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]];
-then
-    rm -Rf clustalw-*
-    rm -Rf $LKTOOLS_DIR/clustalw2
-
-    wget $WGET_OPTS http://www.clustal.org/download/current/clustalw-2.1.tar.gz
-    gunzip clustalw-2.1.tar.gz
-    tar -xf clustalw-2.1.tar
-    gzip clustalw-2.1.tar
-    cd clustalw-2.1
-    ./configure
-    make
-
-    install ./src/clustalw2 $LKTOOLS_DIR/clustalw2
-
-else
-    echo "Already installed"
-fi
-
-
 #
 #muscle
 #

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java
@@ -5069,6 +5069,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
 
                 Set<File> toDelete = new HashSet<>();
                 List<Map<String, Object>> toUpdate = new ArrayList<>();
+                Set<Long> encounteredReaddata = new HashSet<>();
                 for (ReadData rd : rs.getReadData())
                 {
                     if (rd.getSra_accession() == null)
@@ -5077,7 +5078,14 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
                         return null;
                     }
 
+                    // A given ReadData can be referenced by multiple readsets
+                    if (encounteredReaddata.contains(rd.getRowid()))
+                    {
+                        continue;
+                    }
+
                     toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", rd.getRowid(), "archived", true, "container", rd.getContainer())));
+                    encounteredReaddata.add(rd.getRowid());
 
                     // File 1:
                     ExpData d1 = ExperimentService.get().getExpData(rd.getFileId1());
@@ -5131,7 +5139,6 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
                 {
                     List<Map<String, Object>> keys = new ArrayList<>();
                     toUpdate.forEach(row -> {
-
                         keys.add(new CaseInsensitiveHashMap<>(Map.of("rowid", row.get("rowid"))));
                     });
 
@@ -5141,7 +5148,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
                     }
                     catch (Exception e)
                     {
-                        _log.error(e);
+                        _log.error("Error archiving readsets", e);
                         errors.reject(ERROR_MSG, "Error archiving readset: " + readsetId + ", " + e.getMessage());
                         return null;
                     }

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java
@@ -320,6 +320,11 @@ public void testSerializeWithMap() throws Exception
             js1._cachedObjects.put("cachedString", "foo");
             js1._cachedObjects.put("cachedLong", 2L);
 
+            LongHashMap<Long> longMap = new LongHashMap<>();
+            longMap.put(1L, 2L);
+
+            js1._cachedObjects.put("cachedLongMap", longMap);
+
             ObjectMapper mapper = PipelineJob.createObjectMapper();
 
             StringWriter writer = new StringWriter();
@@ -341,10 +346,13 @@ public void testSerializeWithMap() throws Exception
             //NOTE: this is not serializing properly.  the keys are serialized as Strings
             Map serializedMap = deserialized.getCachedObject("cachedMap", mapper.getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class));
             assertEquals("Map not serialized properly", 1, serializedMap.size());
-
-            //TODO: determine if we can coax jackson into serializing these properly
             assertEquals("Object not serialized with correct key type", Integer.class, serializedMap.keySet().iterator().next().getClass());
             assertNotNull("Map keys not serialized properly", serializedMap.get(1));
+
+            LongHashMap<Long> serializedLongMap = (LongHashMap<Long>)deserialized.getCachedObject("cachedLongMap", LongHashMap.class);
+            assertEquals("LongMap not serialized properly", 1, serializedLongMap.size());
+            assertEquals("Object not serialized with correct key type", Long.class, serializedLongMap.keySet().iterator().next().getClass());
+            assertNotNull("LongMap keys not serialized properly", serializedLongMap.get(1L));
         }
 
         @Test

diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java
@@ -318,14 +318,19 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC
             throw new PipelineJobException(e);
         }
 
-        if (seuratThreads != null)
+        Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
+        if (seuratThreads == null && maxThreads != null)
         {
-            Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
-            if (maxThreads != null && maxThreads < seuratThreads)
-            {
-                seuratThreads = maxThreads;
-            }
+            seuratThreads = maxThreads;
+        }
+        else if (seuratThreads != null && maxThreads != null && maxThreads < seuratThreads)
+        {
+            ctx.getLogger().debug("Lowering SEURAT_MAX_THREADS based on the job settings, to: " + maxThreads);
+            seuratThreads = maxThreads;
+        }
 
+        if (seuratThreads != null)
+        {
             wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString());
         }
 

diff --git a/singlecell/resources/chunks/ApplyKnownClonotypicData.R b/singlecell/resources/chunks/ApplyKnownClonotypicData.R
@@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) {
     printName(datasetId)
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
-    seuratObj <- ApplyKnownClonotypicData(seuratObj)
+    seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList, minActivationFrequency = minActivationFrequency)
     saveData(seuratObj, datasetId)
 
     # Cleanup

diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R
@@ -2,14 +2,16 @@ for (datasetId in names(seuratObjects)) {
     printName(datasetId)
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
+    print(paste0('Calculating distances for: ', datasetId))
     seuratObj <- tcrClustR::CalculateTcrDistances(
       inputData = seuratObj,
-      chains = c('TRA', 'TRB', 'TRG', 'TRD'),
+      chains = chains,
       organism = organism,
       minimumCloneSize = 2,
       calculateChainPairs = TRUE
     )
 
+    print('Performing TCR Clustering')
     seuratObj <- tcrClustR::RunTcrClustering(
       seuratObj_TCR = seuratObj,
       dianaHeight = 20,
@@ -22,10 +24,13 @@ for (datasetId in names(seuratObjects)) {
     } else {
       for (an in names(seuratObj@misc$TCR_Distances)) {
         ad <- seuratObj@misc$TCR_Distances[[an]]
-        print(paste0('Assay: ', an, ', total clones: ', nrow(ad)))
+        fn <- length(unique(seuratObj[[paste0(an, '_ClusterIdx')]]))
+        print(paste0('Assay: ', an, ', total clones: ', nrow(ad), '. Distinct families: ', fn))
       }
     }
 
+    VisualizeTcrDistances(seuratObj)
+
     saveData(seuratObj, datasetId)
 
     # Cleanup

diff --git a/singlecell/resources/chunks/PredictTcellActivation.R b/singlecell/resources/chunks/PredictTcellActivation.R
@@ -2,6 +2,14 @@ for (datasetId in names(seuratObjects)) {
   printName(datasetId)
   seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
+  toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE)
+  if (length(toDrop) > 0) {
+    print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', ')))
+    for (colName in toDrop) {
+      seuratObj[[toDrop]] <- NULL
+    }
+  }
+
   seuratObj <- RIRA::PredictTcellActivation(seuratObj)
 
   saveData(seuratObj, datasetId)

diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R
@@ -3,6 +3,11 @@ for (datasetId in names(seuratObjects)) {
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
     seuratObj <- CellMembrane::RunDecoupleR(seuratObj)
+    if (!all(is.na(heatmapGroupingVars))) {
+      for (heatmapGroupingVar in heatmapGroupingVars) {
+        PlotTfData(seuratObj, groupField = heatmapGroupingVar)
+      }
+    }
 
     saveData(seuratObj, datasetId)
 

diff --git a/singlecell/resources/chunks/RunEscape.R b/singlecell/resources/chunks/RunEscape.R
@@ -5,14 +5,41 @@ if (Sys.getenv('SEURAT_MAX_THREADS') != '') {
 }
 
 for (datasetId in names(seuratObjects)) {
-    printName(datasetId)
-    seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
+  printName(datasetId)
+  seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
-    seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores)
+  toDelete <- c()
 
-    saveData(seuratObj, datasetId)
+  vals <- eval(formals(CellMembrane::RunEscape)$msigdbGeneSets)
+  for (idx in seq_along(vals)) {
+    geneSetName <- names(vals)[idx]
+    geneSet <- vals[[idx]]
+    logger::log_info(paste0('Processing: ', geneSetName, ' / ', geneSet))
 
-    # Cleanup
-    rm(seuratObj)
-    gc()
+    fn <- paste0('escape.', datasetId, '.', ifelse(geneSetName == '', yes = geneSet, no = geneSetName), '.rds')
+    if (file.exists(fn)) {
+      logger::log_info(paste0('resuming: ', fn))
+      seuratObj <- readRDS(fn)
+      toDelete <- c(toDelete, fn)
+    } else {
+      msigdbGeneSets <- geneSet
+      if (geneSetName != '') {
+        names(msigdbGeneSets) <- geneSetName
+      }
+
+      seuratObj <- CellMembrane::RunEscape(seuratObj, msigdbGeneSets = msigdbGeneSets, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores)
+      saveRDS(seuratObj, file = fn)
+      toDelete <- c(toDelete, fn)
+    }
+  }
+
+  for(fn in toDelete) {
+    unlink(fn)
+  }
+
+  saveData(seuratObj, datasetId)
+
+  # Cleanup
+  rm(seuratObj)
+  gc()
 }
diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R
@@ -47,11 +47,20 @@ for (datasetId in names(seuratObjects)) {
   }
 
   if (scoreActivation) {
+    # Drop existing columns:
+    toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE)
+    if (length(toDrop) > 0) {
+      print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', ')))
+      for (colName in toDrop) {
+        seuratObj[[toDrop]] <- NULL
+      }
+    }
+
     seuratObj <- RIRA::PredictTcellActivation(seuratObj)
   }
 
   if (recalculateUCells) {
-    seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores)
+    seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores, dropAllExistingUcells = TRUE)
   }
 
   saveData(seuratObj, datasetId)