Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
fdc468e
Use LongHashMap
bbimber Dec 17, 2025
4c1afe3
Use LongHashMap to deserialize
bbimber Dec 17, 2025
87dffd7
Use LongHashMap to serialize, plus testing
bbimber Dec 17, 2025
39737a0
Allow for space in HTML
bbimber Dec 18, 2025
be5a839
Add Single Cell 5' PE V1
bbimber Dec 18, 2025
48bd1eb
Improve logging in PerformTcrClustering
bbimber Dec 18, 2025
a9b3e88
Update filename case
bbimber Dec 18, 2025
e6146f0
Add VisualizeTcrDistances
bbimber Dec 18, 2025
16309ff
Support inclusion/exclusion lists for ApplyKnownClonotypicData
bbimber Dec 19, 2025
a90d209
Support additional methods for escape
bbimber Dec 20, 2025
2893060
Minor behavior change to SEURAT_MAX_THREADS
bbimber Dec 20, 2025
fccb116
Also support heatmapGroupingVars for escape
bbimber Dec 20, 2025
265ad09
Drop clustalw from main tools script
bbimber Dec 22, 2025
70058a9
Bugfix to serialization of Map<Long, Long>
bbimber Dec 22, 2025
431345b
Another bugfix to serialization of Map<Long, Long>
bbimber Dec 23, 2025
2950346
Better resume behavior for escape
bbimber Dec 25, 2025
1a1bd94
Allow tcrClustR to specify a subset of chains
bbimber Dec 25, 2025
1818264
More reporting for RunDecoupler
bbimber Dec 26, 2025
8c041d9
Improve cleanup of old columns
bbimber Dec 28, 2025
1e6256d
Change field defaults
bbimber Dec 28, 2025
65edb7e
Bugfix to ArchiveReadsetsAction
bbimber Dec 30, 2025
efcd54e
Allow whitespace in field
bbimber Dec 31, 2025
552d9ce
Support more clonotype filters
bbimber Dec 31, 2025
71834d1
Merge discvr-25.11 to develop
bbimber Dec 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions SequenceAnalysis/pipeline_code/extra_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,32 @@ then
else
echo "Already installed"
fi

#
#clustalw
#

echo ""
echo ""
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
echo "Installing ClustalW"
echo ""
cd $LKSRC_DIR

if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]];
then
rm -Rf clustal*
rm -Rf 1.2.4-cmake.tar.gz
rm -Rf $LKTOOLS_DIR/clustalw2

wget $WGET_OPTS https://github.com/GSLBiotech/clustal-omega/archive/refs/tags/1.2.4-cmake.tar.gz
tar -xf 1.2.4-cmake.tar.gz
cd clustal-omega-1.2.4-cmake
./configure
make

install ./src/clustalw2 $LKTOOLS_DIR/clustalw2

else
echo "Already installed"
fi
31 changes: 0 additions & 31 deletions SequenceAnalysis/pipeline_code/sequence_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -999,37 +999,6 @@ else
fi


#
#clustalw
#

echo ""
echo ""
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
echo "Installing ClustalW"
echo ""
cd $LKSRC_DIR

if [[ ! -e ${LKTOOLS_DIR}/clustalw2 || ! -z $FORCE_REINSTALL ]];
then
rm -Rf clustalw-*
rm -Rf $LKTOOLS_DIR/clustalw2

wget $WGET_OPTS http://www.clustal.org/download/current/clustalw-2.1.tar.gz
gunzip clustalw-2.1.tar.gz
tar -xf clustalw-2.1.tar
gzip clustalw-2.1.tar
cd clustalw-2.1
./configure
make

install ./src/clustalw2 $LKTOOLS_DIR/clustalw2

else
echo "Already installed"
fi


#
#muscle
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5069,6 +5069,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw

Set<File> toDelete = new HashSet<>();
List<Map<String, Object>> toUpdate = new ArrayList<>();
Set<Long> encounteredReaddata = new HashSet<>();
for (ReadData rd : rs.getReadData())
{
if (rd.getSra_accession() == null)
Expand All @@ -5077,7 +5078,14 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
return null;
}

// A given ReadData can be referenced by multiple readsets
if (encounteredReaddata.contains(rd.getRowid()))
{
continue;
}

toUpdate.add(new CaseInsensitiveHashMap<>(Map.of("rowid", rd.getRowid(), "archived", true, "container", rd.getContainer())));
encounteredReaddata.add(rd.getRowid());

// File 1:
ExpData d1 = ExperimentService.get().getExpData(rd.getFileId1());
Expand Down Expand Up @@ -5131,7 +5139,6 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
{
List<Map<String, Object>> keys = new ArrayList<>();
toUpdate.forEach(row -> {

keys.add(new CaseInsensitiveHashMap<>(Map.of("rowid", row.get("rowid"))));
});

Expand All @@ -5141,7 +5148,7 @@ public ApiResponse execute(ArchiveReadsetsForm form, BindException errors) throw
}
catch (Exception e)
{
_log.error(e);
_log.error("Error archiving readsets", e);
errors.reject(ERROR_MSG, "Error archiving readset: " + readsetId + ", " + e.getMessage());
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,11 @@ public void testSerializeWithMap() throws Exception
js1._cachedObjects.put("cachedString", "foo");
js1._cachedObjects.put("cachedLong", 2L);

LongHashMap<Long> longMap = new LongHashMap<>();
longMap.put(1L, 2L);

js1._cachedObjects.put("cachedLongMap", longMap);

ObjectMapper mapper = PipelineJob.createObjectMapper();

StringWriter writer = new StringWriter();
Expand All @@ -341,10 +346,13 @@ public void testSerializeWithMap() throws Exception
//NOTE: this is not serializing properly. the keys are serialized as Strings
Map serializedMap = deserialized.getCachedObject("cachedMap", mapper.getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class));
assertEquals("Map not serialized properly", 1, serializedMap.size());

//TODO: determine if we can coax jackson into serializing these properly
assertEquals("Object not serialized with correct key type", Integer.class, serializedMap.keySet().iterator().next().getClass());
assertNotNull("Map keys not serialized properly", serializedMap.get(1));

LongHashMap<Long> serializedLongMap = (LongHashMap<Long>)deserialized.getCachedObject("cachedLongMap", LongHashMap.class);
assertEquals("LongMap not serialized properly", 1, serializedLongMap.size());
assertEquals("Object not serialized with correct key type", Long.class, serializedLongMap.keySet().iterator().next().getClass());
assertNotNull("LongMap keys not serialized properly", serializedLongMap.get(1L));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,14 +318,19 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC
throw new PipelineJobException(e);
}

if (seuratThreads != null)
Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
if (seuratThreads == null && maxThreads != null)
{
Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
if (maxThreads != null && maxThreads < seuratThreads)
{
seuratThreads = maxThreads;
}
seuratThreads = maxThreads;
}
else if (seuratThreads != null && maxThreads != null && maxThreads < seuratThreads)
{
ctx.getLogger().debug("Lowering SEURAT_MAX_THREADS based on the job settings, to: " + maxThreads);
seuratThreads = maxThreads;
}

if (seuratThreads != null)
{
wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString());
}

Expand Down
2 changes: 1 addition & 1 deletion singlecell/resources/chunks/ApplyKnownClonotypicData.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ for (datasetId in names(seuratObjects)) {
printName(datasetId)
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

seuratObj <- ApplyKnownClonotypicData(seuratObj)
seuratObj <- ApplyKnownClonotypicData(seuratObj, antigenInclusionList = antigenInclusionList, antigenExclusionList = antigenExclusionList, minActivationFrequency = minActivationFrequency)
saveData(seuratObj, datasetId)

# Cleanup
Expand Down
9 changes: 7 additions & 2 deletions singlecell/resources/chunks/PerformTcrClustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ for (datasetId in names(seuratObjects)) {
printName(datasetId)
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

print(paste0('Calculating distances for: ', datasetId))
seuratObj <- tcrClustR::CalculateTcrDistances(
inputData = seuratObj,
chains = c('TRA', 'TRB', 'TRG', 'TRD'),
chains = chains,
organism = organism,
minimumCloneSize = 2,
calculateChainPairs = TRUE
)

print('Performing TCR Clustering')
seuratObj <- tcrClustR::RunTcrClustering(
seuratObj_TCR = seuratObj,
dianaHeight = 20,
Expand All @@ -22,10 +24,13 @@ for (datasetId in names(seuratObjects)) {
} else {
for (an in names(seuratObj@misc$TCR_Distances)) {
ad <- seuratObj@misc$TCR_Distances[[an]]
print(paste0('Assay: ', an, ', total clones: ', nrow(ad)))
fn <- length(unique(seuratObj[[paste0(an, '_ClusterIdx')]]))
print(paste0('Assay: ', an, ', total clones: ', nrow(ad), '. Distinct families: ', fn))
}
}

VisualizeTcrDistances(seuratObj)

saveData(seuratObj, datasetId)

# Cleanup
Expand Down
8 changes: 8 additions & 0 deletions singlecell/resources/chunks/PredictTcellActivation.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ for (datasetId in names(seuratObjects)) {
printName(datasetId)
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE)
if (length(toDrop) > 0) {
print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', ')))
for (colName in toDrop) {
seuratObj[[toDrop]] <- NULL
}
}

seuratObj <- RIRA::PredictTcellActivation(seuratObj)

saveData(seuratObj, datasetId)
Expand Down
5 changes: 5 additions & 0 deletions singlecell/resources/chunks/RunDecoupler.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ for (datasetId in names(seuratObjects)) {
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

seuratObj <- CellMembrane::RunDecoupleR(seuratObj)
if (!all(is.na(heatmapGroupingVars))) {
for (heatmapGroupingVar in heatmapGroupingVars) {
PlotTfData(seuratObj, groupField = heatmapGroupingVar)
}
}

saveData(seuratObj, datasetId)

Expand Down
41 changes: 34 additions & 7 deletions singlecell/resources/chunks/RunEscape.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,41 @@ if (Sys.getenv('SEURAT_MAX_THREADS') != '') {
}

for (datasetId in names(seuratObjects)) {
printName(datasetId)
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
printName(datasetId)
seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

seuratObj <- CellMembrane::RunEscape(seuratObj, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, performDimRedux = performDimRedux, nCores = nCores)
toDelete <- c()

saveData(seuratObj, datasetId)
vals <- eval(formals(CellMembrane::RunEscape)$msigdbGeneSets)
for (idx in seq_along(vals)) {
geneSetName <- names(vals)[idx]
geneSet <- vals[[idx]]
logger::log_info(paste0('Processing: ', geneSetName, ' / ', geneSet))

# Cleanup
rm(seuratObj)
gc()
fn <- paste0('escape.', datasetId, '.', ifelse(geneSetName == '', yes = geneSet, no = geneSetName), '.rds')
if (file.exists(fn)) {
logger::log_info(paste0('resuming: ', fn))
seuratObj <- readRDS(fn)
toDelete <- c(toDelete, fn)
} else {
msigdbGeneSets <- geneSet
if (geneSetName != '') {
names(msigdbGeneSets) <- geneSetName
}

seuratObj <- CellMembrane::RunEscape(seuratObj, msigdbGeneSets = msigdbGeneSets, outputAssayBaseName = outputAssayBaseName, doPlot = TRUE, heatmapGroupingVars = heatmapGroupingVars, performDimRedux = performDimRedux, escapeMethod = escapeMethod, nCores = nCores)
saveRDS(seuratObj, file = fn)
toDelete <- c(toDelete, fn)
}
}

for(fn in toDelete) {
unlink(fn)
}

saveData(seuratObj, datasetId)

# Cleanup
rm(seuratObj)
gc()
}
11 changes: 10 additions & 1 deletion singlecell/resources/chunks/UpdateSeuratPrototype.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,20 @@ for (datasetId in names(seuratObjects)) {
}

if (scoreActivation) {
# Drop existing columns:
toDrop <- grep(names(seuratObj@meta.data), pattern = "sPLS", value = TRUE)
if (length(toDrop) > 0) {
print(paste0('Dropping pre-existing columns: ', paste0(toDrop, collapse = ', ')))
for (colName in toDrop) {
seuratObj[[toDrop]] <- NULL
}
}

seuratObj <- RIRA::PredictTcellActivation(seuratObj)
}

if (recalculateUCells) {
seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores)
seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = FALSE, assayName = 'RNA', forceRecalculate = TRUE, ncores = nCores, dropAllExistingUcells = TRUE)
}

saveData(seuratObj, datasetId)
Expand Down
Loading