diff --git a/PMR/resources/etls/pmr-datasets.xml b/PMR/resources/etls/pmr-datasets.xml index 19e981726..1186b27a4 100644 --- a/PMR/resources/etls/pmr-datasets.xml +++ b/PMR/resources/etls/pmr-datasets.xml @@ -141,6 +141,8 @@ relationship method objectid + created + modified @@ -171,6 +173,8 @@ conception conceptualDay objectid + created + modified diff --git a/PMR/resources/etls/pmr-demographics.xml b/PMR/resources/etls/pmr-demographics.xml index 9c06b7b3c..03b2bf94e 100644 --- a/PMR/resources/etls/pmr-demographics.xml +++ b/PMR/resources/etls/pmr-demographics.xml @@ -17,6 +17,8 @@ calculated_status QCState/Label objectid + created + modified diff --git a/mGAP/resources/data/species.tsv b/mGAP/resources/data/species.tsv new file mode 100644 index 000000000..2472af328 --- /dev/null +++ b/mGAP/resources/data/species.tsv @@ -0,0 +1,10 @@ +common_name scientific_name mhc_prefix +Cotton-top Tamarin Saguinus oedipus Saoe +Cynomolgus macaque Macaca fascicularis Mafa +Marmoset Callithrix jacchus Caja +Pigtail macaque Macaca nemestrina Mane +Rhesus macaque Macaca mulatta Mamu +Sooty Mangabey Cercocebus atys Ceat +Stump Tailed Macaca Arctoides Maar +Vervet Chlorocebus sabaeus Chsa +Japanese macaque Macaca fuscata Mafu diff --git a/mGAP/resources/etls/prime-seq.xml b/mGAP/resources/etls/prime-seq.xml index 8b2bcecc9..312f0d033 100644 --- a/mGAP/resources/etls/prime-seq.xml +++ b/mGAP/resources/etls/prime-seq.xml @@ -53,12 +53,16 @@ releaseId trackName label + species totalSamples category url source description isprimarytrack + shouldindex + vcfIndexId/dataid/DataFileUrl + vcfIndexId/library_id/name vcfId/dataid/DataFileUrl vcfId/library_id/name @@ -66,6 +70,7 @@ + @@ -101,6 +106,7 @@ releaseDate vcfId/dataid/DataFileUrl genomeId/name + species totalSubjects totalVariants dbSnpId diff --git a/mGAP/resources/queries/mGAP/combinedPedigree.sql b/mGAP/resources/queries/mGAP/combinedPedigree.sql index 55512965a..4725d8998 100644 --- a/mGAP/resources/queries/mGAP/combinedPedigree.sql +++ b/mGAP/resources/queries/mGAP/combinedPedigree.sql @@ -1,12 +1,12 @@ SELECT - s.subjectname, + s.Id as subjectname, s.gender, - s.mother as dam, - s.father as sire, + s.Id.parents.dam as dam, + s.Id.parents.sire as sire, s.species, - s.geographic_origin, + s.geographic_origin -FROM laboratory.subjects s +FROM "/Internal/PMR/".study.demographics s UNION ALL @@ -19,4 +19,4 @@ SELECT null as geographic_origin FROM mgap.demographics d -WHERE d.subjectname NOT IN (SELECT DISTINCT s.subjectname FROM laboratory.subjects s) \ No newline at end of file +WHERE d.subjectname NOT IN (SELECT DISTINCT s.Id FROM "/Internal/PMR/".study.demographics s) \ No newline at end of file diff --git a/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml b/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml index 08488c2ec..7de86c8ae 100644 --- a/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml +++ b/mGAP/resources/queries/mGAP/releaseTracks/.qview.xml @@ -3,6 +3,7 @@ + @@ -10,7 +11,6 @@ - diff --git a/mGAP/resources/queries/mGAP/sampleSummary.query.xml b/mGAP/resources/queries/mGAP/sampleSummary.query.xml index 4d9e68f7c..86a7a599e 100644 --- a/mGAP/resources/queries/mGAP/sampleSummary.query.xml +++ b/mGAP/resources/queries/mGAP/sampleSummary.query.xml @@ -2,8 +2,15 @@ + subjectName mGAP Subject/gVCF Summary + + + SubjectId Listed In Alias Table + true + +
diff --git a/mGAP/resources/queries/mGAP/sampleSummary.sql b/mGAP/resources/queries/mGAP/sampleSummary.sql index 385533ddf..d45c1c4bb 100644 --- a/mGAP/resources/queries/mGAP/sampleSummary.sql +++ b/mGAP/resources/queries/mGAP/sampleSummary.sql @@ -8,7 +8,8 @@ SELECT ss.center, t.tracks, t.total, - CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics + CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics, + am.subjectname as aliasSubjectName FROM (SELECT COALESCE(o.readset.subjectId, rt.subjectId) as subjectId, diff --git a/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml b/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml new file mode 100644 index 000000000..48de68347 --- /dev/null +++ b/mGAP/resources/queries/mGAP/sampleSummary/.qview.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml index f8b614ee2..425728879 100644 --- a/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml +++ b/mGAP/resources/queries/mGAP/variantCatalogReleases/.qview.xml @@ -1,6 +1,7 @@ + diff --git a/mGAP/resources/r/UpdateTracks.r b/mGAP/resources/r/UpdateTracks.r new file mode 100644 index 000000000..8646b5cb0 --- /dev/null +++ b/mGAP/resources/r/UpdateTracks.r @@ -0,0 +1,115 @@ +library(Rlabkey) +library(dplyr) + +# This script is designed to be run externally per release, to identify subject that need to be added to the releaseTrackSubsets table: + +testByCenter <- function(centerName, trackName) { + dat <- suppressWarnings(labkey.selectRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="sampleSummary", + viewName="", + colSelect="subjectId,externalAlias", + colFilter=makeFilter( + c("tracks", "DOES_NOT_CONTAIN", trackName), + c("center", "EQUAL", centerName)), + containerFilter=NULL, + colNameOpt="rname" + )) + + print(paste0(trackName, ': ', nrow(dat))) + + if (nrow(dat) == 0) { + return(NULL) + } + + return(data.frame(trackName = trackName, subjectId = dat$subjectid)) +} + +testBySpecies <- function(speciesList, trackName) { + dat <- suppressWarnings(labkey.selectRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="sampleSummary", + viewName="", + colSelect="subjectId,externalAlias", + colFilter=makeFilter( + c("tracks", "DOES_NOT_CONTAIN", trackName), + c("species", "IN", paste0(speciesList, collapse = ';'))), + containerFilter=NULL, + colNameOpt="rname" + )) + + print(paste0(trackName, ': ', nrow(dat))) + + if (nrow(dat) == 0) { + return(NULL) + } + + return(data.frame(trackName = trackName, subjectId = dat$subjectid)) +} + +toInsert <- rbind( + testByCenter('CNPRC', 'CNPRC Animals'), + testByCenter('TNPRC', 'TNPRC Animals'), + testByCenter('ENPRC', 'ENPRC Animals'), + testByCenter('NEPRC', 'NEPRC Animals'), + testByCenter('SNPRC', 'SNPRC Animals'), + testByCenter('ONPRC', 'ONPRC Animals'), + testByCenter('MDA', 'MDA Animals'), + testByCenter('WFU', 'WFU Animals'), + testByCenter('CPRC', 'CPRC Animals'), + testBySpecies(c('RHESUS MACAQUE', 'Rhesus', 'Macaca mulatta'), 'Rhesus Macaques'), + testBySpecies(c('JAPANESE MACAQUE', 'Macaca fuscata'), 'Japanese Macaques') +) + + +if (FALSE) { + added <- labkey.insertRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="releaseTrackSubsets", + toInsert = toInsert + ) +} + + +# Now ensure all tracks exist: +existingTracks <- labkey.selectRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="releaseTracks", + colNameOpt="rname" +) + +missingTrackNames <- labkey.selectRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="releaseTrackSubsets", + colSelect="trackName", + colNameOpt="rname" +) %>% + filter(!trackname %in% existingTracks$trackname) %>% + select(trackname) %>% unique() + +if (nrow(missingTrackNames) > 0) { + toAdd <- data.frame(trackName = missingTrackNames$trackname, label = missingTrackNames$trackname, isprimarytrack = FALSE) + toAdd$Category <- 'Species Dataset' + # Add anything else desired, like species, source, url, description, category + + if (FALSE) { + added <- labkey.insertRows( + baseUrl="https://prime-seq.ohsu.edu", + folderPath="/Internal/ColonyData", + schemaName="mgap", + queryName="releaseTracks", + toInsert = toAdd + ) + } +} + diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql new file mode 100644 index 000000000..e4f4faba8 --- /dev/null +++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.73-16.74.sql @@ -0,0 +1,3 @@ +ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000); +ALTER TABLE mGAP.releaseTracks ADD species varchar(1000); +ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority; \ No newline at end of file diff --git a/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql new file mode 100644 index 000000000..7080ea1bc --- /dev/null +++ b/mGAP/resources/schemas/dbscripts/postgresql/mgap-16.74-16.75.sql @@ -0,0 +1,5 @@ +ALTER TABLE mGAP.releaseTracks ADD shouldindex boolean default false; +ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int; + +ALTER TABLE mGAP.tracksPerRelease ADD shouldindex boolean default false; +ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int; \ No newline at end of file diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql new file mode 100644 index 000000000..e4f4faba8 --- /dev/null +++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.73-16.74.sql @@ -0,0 +1,3 @@ +ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000); +ALTER TABLE mGAP.releaseTracks ADD species varchar(1000); +ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority; \ No newline at end of file diff --git a/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql new file mode 100644 index 000000000..39631f183 --- /dev/null +++ b/mGAP/resources/schemas/dbscripts/sqlserver/mgap-16.74-16.75.sql @@ -0,0 +1,5 @@ +ALTER TABLE mGAP.releaseTracks ADD shouldindex bit default 0; +ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int; + +ALTER TABLE mGAP.tracksPerRelease ADD shouldindex bit default 0; +ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int; \ No newline at end of file diff --git a/mGAP/resources/schemas/mgap.xml b/mGAP/resources/schemas/mgap.xml index 24bfd6332..caed3fa92 100644 --- a/mGAP/resources/schemas/mgap.xml +++ b/mGAP/resources/schemas/mgap.xml @@ -95,6 +95,15 @@ false Row Id + + Species + + laboratory + species + common_name + + + Version false @@ -644,13 +653,22 @@ Label false + + Species + + laboratory + species + common_name + + + Source ${url} Category - false + true URL @@ -669,14 +687,21 @@ textarea - Is Primary Track? - - - Merge Priority Order - This order will be used for genotype priority order when merging to create the primary release VCF. Lower numbers have higher priority. Set to -1 to exclude this track when merging. + Is Primary Track For Species? - Skip Validation? + Skip Annotation Checks? + + + Should Include Lucene Index? + + + Lucene Index Id + + sequenceanalysis + outputfiles + rowid + true @@ -812,7 +837,7 @@ Category - false + true URL @@ -831,9 +856,20 @@ textarea - Is Primary Track? + Is Primary Track For Species? false + + Should Include Lucene Index? + + + Lucene Index Id + + sequenceanalysis + outputfiles + rowid + + true diff --git a/mGAP/resources/views/mgapDataDashboard.html b/mGAP/resources/views/mgapDataDashboard.html index fc6ce214d..1b0fd9138 100644 --- a/mGAP/resources/views/mgapDataDashboard.html +++ b/mGAP/resources/views/mgapDataDashboard.html @@ -76,6 +76,13 @@ queryName: 'sampleSummary', 'query.externalAlias~isblank': '' }) + },{ + name: 'gVCFs With SubjectId / Case-sensitive Difference', + url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { + schemaName: 'mgap', + queryName: 'subjectCaseMismatch', + 'query.externalAlias~isnonblank': '' + }) }] },{ header: 'Prior Releases', diff --git a/mGAP/resources/web/mGAP/window/ReleaseWindow.js b/mGAP/resources/web/mGAP/window/ReleaseWindow.js index 9921c9746..51fe467b0 100644 --- a/mGAP/resources/web/mGAP/window/ReleaseWindow.js +++ b/mGAP/resources/web/mGAP/window/ReleaseWindow.js @@ -12,22 +12,28 @@ Ext4.define('mGAP.window.ReleaseWindow', { schemaName: 'mgap', queryName: 'releaseTracks', scope: this, - columns: 'vcfId,trackName,vcfId/library_id,isprimarytrack', + columns: 'vcfId,species,trackName,vcfId/library_id,isprimarytrack', failure: LDK.Utils.getErrorCallback(), success: function (results) { Ext4.Msg.hide(); var outputFiles = []; - var distinctGenomes = []; + var distinctGenomesBySpecies = {}; Ext4.Array.forEach(results.rows, function(r){ - if (r.vcfId) { - outputFiles.push(r.vcfId); + if (!r.vcfId) { + Ext4.Msg.alert('Error', 'Track lacks VCF ID: ' + r.trackName); + return false; + } - if (r['vcfId/library_id']) { - distinctGenomes.push(r['vcfId/library_id']); - } + if (!r.species) { + Ext4.Msg.alert('Error', 'Track lacks species: ' + r.trackName); + return false; } - else if (!r['isprimarytrack']) { - console.error('Track lacks VCF ID: ' + r.trackName); + + outputFiles.push(r.vcfId); + + distinctGenomesBySpecies[r.species] = distinctGenomesBySpecies[r.species] || []; + if (r['vcfId/library_id']) { + distinctGenomesBySpecies[r.species].push(r['vcfId/library_id']); } }, this); @@ -36,9 +42,12 @@ Ext4.define('mGAP.window.ReleaseWindow', { return; } - distinctGenomes = Ext4.Array.unique(distinctGenomes); - if (distinctGenomes.length !== 1){ - Ext4.Msg.alert('Error', 'All files must use the same genome. Genomes found: ' + distinctGenomes.length); + for (sn in Ext4.Object.getKeys(distinctGenomesBySpecies)) { + var genomes = Ext4.Array.unique(distinctGenomesBySpecies[sn]); + if (genomes.length !== 1){ + Ext4.Msg.alert('Error', 'All files must use the same genome. Genomes found for species ' + sn + ': ' + genomes.length); + return; + } } LABKEY.Ajax.request({ @@ -68,7 +77,7 @@ Ext4.define('mGAP.window.ReleaseWindow', { title: results.name, handlerConfig: results, toolParameters: results.toolParameters, - libraryId: distinctGenomes.length == 1 ? distinctGenomes[0] : null + libraryId: distinctGenomes.length === 1 ? distinctGenomes[0] : null }).show(); } } diff --git a/mGAP/src/org/labkey/mgap/mGAPController.java b/mGAP/src/org/labkey/mgap/mGAPController.java index e8f0ebbc4..52991080b 100644 --- a/mGAP/src/org/labkey/mgap/mGAPController.java +++ b/mGAP/src/org/labkey/mgap/mGAPController.java @@ -58,6 +58,7 @@ import org.labkey.api.query.QueryUpdateService; import org.labkey.api.query.UserSchema; import org.labkey.api.reader.Readers; +import org.labkey.api.resource.Resource; import org.labkey.api.security.AuthenticationManager; import org.labkey.api.security.Group; import org.labkey.api.security.GroupManager; @@ -224,7 +225,7 @@ public Object execute(RequestUserForm form, BindException errors) throws Excepti } DetailsURL url = DetailsURL.fromString("/query/executeQuery.view?schemaName=mgap&query.queryName=userRequests&query.viewName=Pending Requests", c); - mail.setEncodedHtmlContent("A user requested an account on mGap. Click here to view/approve this request"); + mail.setEncodedHtmlContent("A user requested an account on mGap. Click here to view/approve this request"); mail.setFrom(getReplyEmail(getContainer())); mail.setSubject("mGap Account Request"); mail.addRecipients(Message.RecipientType.TO, emails.toArray(new Address[emails.size()])); @@ -412,13 +413,13 @@ public Object execute(ApproveUserRequestsForm form, BindException errors) throws User u; if (map.get("userId") != null) { - Integer userId = (Integer)map.get("userId"); + Integer userId = (Integer) map.get("userId"); u = UserManager.getUser(userId); existingUsersGivenAccess.add(u); } else { - ValidEmail ve = new ValidEmail((String)map.get("email")); + ValidEmail ve = new ValidEmail((String) map.get("email")); u = UserManager.getUser(ve); if (u != null) { @@ -428,8 +429,8 @@ public Object execute(ApproveUserRequestsForm form, BindException errors) throws { SecurityManager.NewUserStatus st = SecurityManager.addUser(ve, getUser()); u = st.getUser(); - u.setFirstName((String)map.get("firstName")); - u.setLastName((String)map.get("lastName")); + u.setFirstName((String) map.get("firstName")); + u.setLastName((String) map.get("lastName")); UserManager.updateUser(getUser(), u); if (st.isLdapOrSsoEmail()) @@ -539,7 +540,7 @@ private static Map getReleaseRow(User u, ReleaseForm form, Error return null; } - Container rowContainer = ContainerManager.getForId((String)row.get("container")); + Container rowContainer = ContainerManager.getForId((String) row.get("container")); if (rowContainer == null) { errors.reject(ERROR_MSG, "Unknown row container: " + form.getReleaseId()); @@ -555,7 +556,7 @@ else if (!rowContainer.hasPermission(u, ReadPermission.class)) private static SequenceOutputFile getOutputFile(Map row, ReleaseForm form, Errors errors) { - SequenceOutputFile so = SequenceOutputFile.getForId((Integer)row.get("vcfId")); + SequenceOutputFile so = SequenceOutputFile.getForId((Integer) row.get("vcfId")); if (so == null) { errors.reject(ERROR_MSG, "Unknown VCF file ID: " + form.getReleaseId()); @@ -590,7 +591,7 @@ public void export(DownloadBundleForm form, HttpServletResponse response, BindEx } Set toZip = new HashSet<>(); - String zipName = "mGap_VariantCatalog_v" + FileUtil.makeLegalName((String)row.get("version")); + String zipName = "mGap_VariantCatalog_v" + FileUtil.makeLegalName((String) row.get("version")); zipName = zipName.replaceAll(" ", "_"); toZip.add(so.getFile()); @@ -598,7 +599,7 @@ public void export(DownloadBundleForm form, HttpServletResponse response, BindEx if (form.getIncludeGenome()) { - ReferenceGenome genome = SequenceAnalysisService.get().getReferenceGenome((Integer)row.get("genomeId"), getUser()); + ReferenceGenome genome = SequenceAnalysisService.get().getReferenceGenome((Integer) row.get("genomeId"), getUser()); if (genome == null) { errors.reject(ERROR_MSG, "Unknown genome: " + row.get("genomeId")); @@ -969,7 +970,7 @@ public URLHelper getRedirectURL(GenomeBrowserForm form) String species = StringUtils.trimToNull(form.getSpecies()); if (jbrowseDatabaseId == null) { - jbrowseDatabaseId = ctx.getString("human".equals(species) ? "mgapJBrowseHuman": "mgapJBrowse"); + jbrowseDatabaseId = ctx.getString("human".equals(species) ? "mgapJBrowseHuman" : "mgapJBrowse"); } if (jbrowseDatabaseId == null) @@ -1283,4 +1284,71 @@ public URLHelper getSuccessURL(Object o) return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer()); } } + + @RequiresPermission(AdminPermission.class) + public static class ImportDataAction extends ConfirmAction + { + @Override + public ModelAndView getConfirmView(Object o, BindException errors) throws Exception + { + setTitle("Import mGAP Reference Data"); + + return HtmlView.of("This will import default values for reference tables. Do you want to continue?"); + } + + @Override + public void validateCommand(Object o, Errors errors) + { + + } + + @Override + public @NotNull URLHelper getSuccessURL(Object o) + { + return getContainer().getStartURL(getUser()); + } + + @Override + public boolean handlePost(Object o, BindException errors) throws Exception + { + Resource r = ModuleLoader.getInstance().getModule(mGAPModule.class).getModuleResource(Path.parse("data/species.tsv")); + if (!r.exists()) + { + throw new IllegalStateException("Unable to find species.tsv"); + } + + List> toAdd = new ArrayList<>(); + try (CSVReader reader = new CSVReader(Readers.getReader(r.getInputStream()), '\t')) + { + String[] line; + while ((line = reader.readNext()) != null) + { + if (line[0].equals("common_name")) + { + continue; + } + + Map row = new CaseInsensitiveHashMap<>(); + row.put("common_name", line[0]); + row.put("scientific_name", line[1]); + row.put("mhc_prefix", line[2]); + + toAdd.add(row); + } + } + + UserSchema us = QueryService.get().getUserSchema(getUser(), getContainer(), "laboratory"); + TableInfo ti = us.getTable("species"); + ti.getUpdateService().truncateRows(getUser(), getContainer(), null, null); + + BatchValidationException bve = new BatchValidationException(); + ti.getUpdateService().insertRows(getUser(), getContainer(), toAdd, bve, null, null); + if (bve.hasErrors()) + { + throw bve; + } + + return true; + } + } } \ No newline at end of file diff --git a/mGAP/src/org/labkey/mgap/mGAPModule.java b/mGAP/src/org/labkey/mgap/mGAPModule.java index 7bcc1c7c4..36fccd989 100644 --- a/mGAP/src/org/labkey/mgap/mGAPModule.java +++ b/mGAP/src/org/labkey/mgap/mGAPModule.java @@ -57,7 +57,6 @@ import org.labkey.mgap.pipeline.SampleSpecificGenotypeFiltrationStep; import org.labkey.mgap.pipeline.VcfComparisonStep; import org.labkey.mgap.pipeline.mGapReleaseAlleleFreqStep; -import org.labkey.mgap.pipeline.mGapReleaseAnnotateNovelSitesStep; import org.labkey.mgap.pipeline.mGapReleaseComparisonStep; import org.labkey.mgap.pipeline.mGapReleaseGenerator; import org.labkey.mgap.query.mGAPUserSchema; @@ -77,7 +76,7 @@ public String getName() @Override public Double getSchemaVersion() { - return 16.73; + return 16.75; } @Override @@ -141,7 +140,6 @@ public PipelineStartup() SequencePipelineService.get().registerPipelineStep(new VcfComparisonStep.Provider()); SequencePipelineService.get().registerPipelineStep(new mGapReleaseComparisonStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SampleSpecificGenotypeFiltrationStep.Provider()); - SequencePipelineService.get().registerPipelineStep(new mGapReleaseAnnotateNovelSitesStep.Provider()); SequencePipelineService.get().registerPipelineStep(new GenerateMgapTracksStep.Provider()); SequencePipelineService.get().registerPipelineStep(new IndexVariantsForMgapStep.Provider()); SequencePipelineService.get().registerPipelineStep(new mGapReleaseAlleleFreqStep.Provider()); diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java new file mode 100644 index 000000000..3fef55a99 --- /dev/null +++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotateNovelSitesWrapper.java @@ -0,0 +1,58 @@ +package org.labkey.mgap.pipeline; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +public class AnnotateNovelSitesWrapper extends AbstractDiscvrSeqWrapper +{ + public AnnotateNovelSitesWrapper(Logger log) + { + super(log); + } + + public File execute(File vcf, @Nullable File referenceVcf, File fasta, String versionString, File vcfOutput, List extraArgs) throws PipelineJobException + { + List args = new ArrayList<>(getBaseArgs()); + args.add("AnnotateNovelSites"); + args.add("-R"); + args.add(fasta.getPath()); + + args.add("-V"); + args.add(vcf.getPath()); + + if (referenceVcf != null) + { + args.add("-rv"); + args.add(referenceVcf.getPath()); + } + else + { + args.add("--allow-missing-ref"); + } + + args.add("-an"); + args.add("mGAPV"); + args.add("-ad"); + args.add("The first mGAP version where variants at this site appeared"); + args.add("-av"); + args.add(versionString); + + args.add("-O"); + args.add(vcfOutput.getPath()); + + if (extraArgs != null) + { + args.addAll(extraArgs); + } + + execute(args); + + return vcfOutput; + } +} diff --git a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java index 96dd56b56..2854ce3c5 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java @@ -2,7 +2,6 @@ import htsjdk.samtools.util.Interval; import htsjdk.variant.vcf.VCFFileReader; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; @@ -25,18 +24,14 @@ import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.sequenceanalysis.pipeline.PipelineStep; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; -import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; -import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; import org.labkey.mgap.mGAPSchema; @@ -57,7 +52,7 @@ /** * Created by bimber on 5/2/2017. */ -public class AnnotationStep extends AbstractCommandPipelineStep implements VariantProcessingStep +public class AnnotationStep extends AbstractCommandPipelineStep implements VariantProcessingStep { public static final String GRCH37 = "genome37"; private static final String CLINVAR_VCF = "clinvar37"; @@ -65,7 +60,7 @@ public class AnnotationStep extends AbstractCommandPipelineStep public AnnotationStep(PipelineStepProvider provider, PipelineContext ctx) { - super(provider, ctx, new CassandraRunner(ctx.getLogger())); + super(provider, ctx, new MultiSourceAnnotatorRunner(ctx.getLogger())); } public static class Provider extends AbstractVariantProcessingStepProvider implements VariantProcessingStep.SupportsScatterGather @@ -88,10 +83,6 @@ public Provider() put("valueField", "rowid"); put("allowBlank", false); }}, null), - ToolParameterDescriptor.create("useCassandra", "Use Cassandra", "If checked, Cassandra will be run.", "checkbox", new JSONObject() - {{ - put("checked", true); - }}, true), ToolParameterDescriptor.create("useFuncotator", "Use Funcotator", "If checked, Extended Funcotator will be run.", "checkbox", new JSONObject() {{ put("checked", true); @@ -242,44 +233,20 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno boolean dropGenotypes = totalSubjects > 10; boolean dropFiltered = getProvider().getParameterByName("dropFiltered").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class); - //This flag exists to allow in-flight jobs to be reworked to include a sample. it should eventually be removed. - boolean forceRecreate = false; - File currentVcf = inputVCF; if (dropGenotypes || dropFiltered) { if (dropGenotypes) - getPipelineCtx().getLogger().info("dropping most genotypes prior to liftover for performance reasons. a single is retained since cassandra requires one."); + getPipelineCtx().getLogger().info("dropping genotypes prior to liftover for performance reasons."); if (dropFiltered) getPipelineCtx().getLogger().info("dropping filtered sites"); File subset = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".subset.vcf.gz"); - //NOTE: this check exists to correct in-flight jobs created using --sites-only-vcf-output. It should eventually be removed. - if (subset.exists()) - { - try (VCFFileReader reader = new VCFFileReader(subset)) - { - if (reader.getFileHeader().getGenotypeSamples().isEmpty()) - { - getPipelineCtx().getLogger().info("A VCF appears to have been created with --sites-only. Will overwrite these using an output with a single sample for Cassandra"); - forceRecreate = true; - } - } - } - List selectArgs = new ArrayList<>(); if (dropGenotypes) { - //NOTE: Cassandra requires at least one genotype, so instead of --sites-only-vcf-output, subset to first sample only - String firstSample; - try (VCFFileReader reader = new VCFFileReader(inputVCF)) - { - firstSample = reader.getFileHeader().getGenotypeSamples().get(0); - } - - selectArgs.add("-sn"); - selectArgs.add(firstSample); + selectArgs.add("--sites-only-vcf-output"); } if (dropFiltered) @@ -297,7 +264,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno needToSubsetToInterval = false; } - if (forceRecreate || !indexExists(subset)) + if (!indexExists(subset)) { SelectVariantsWrapper wrapper = new SelectVariantsWrapper(getPipelineCtx().getLogger()); wrapper.execute(originalGenome.getWorkingFastaFile(), inputVCF, subset, selectArgs); @@ -332,7 +299,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno needToSubsetToInterval = false; File intervalSubset = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".intervalSubset.vcf.gz"); - if (forceRecreate || !indexExists(intervalSubset)) + if (!indexExists(intervalSubset)) { SelectVariantsWrapper wrapper = new SelectVariantsWrapper(getPipelineCtx().getLogger()); wrapper.execute(originalGenome.getWorkingFastaFile(), inputVCF, intervalSubset, selectArgs); @@ -358,7 +325,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno File liftedToGRCh37 = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftTo" + grch37Genome.getGenomeId() + ".vcf.gz"); File liftoverRejects = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftoverReject" + grch37Genome.getGenomeId() + ".vcf.gz"); - if (forceRecreate || !indexExists(liftoverRejects) || !indexExists(liftedToGRCh37)) + if (!indexExists(liftoverRejects) || !indexExists(liftedToGRCh37)) { LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(getPipelineCtx().getLogger()); liftoverVcfRunner.doLiftover(currentVcf, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95); @@ -374,7 +341,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno //annotate with clinvar getPipelineCtx().getLogger().info("annotating with ClinVar 2.0"); File clinvarAnnotated = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cv.vcf.gz"); - if (forceRecreate || !indexExists(clinvarAnnotated)) + if (!indexExists(clinvarAnnotated)) { ClinvarAnnotatorRunner cvRunner = new ClinvarAnnotatorRunner(getPipelineCtx().getLogger()); cvRunner.execute(liftedToGRCh37, clinvarVCF, clinvarAnnotated); @@ -390,7 +357,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno //backport ClinVar getPipelineCtx().getLogger().info("backport ClinVar 2.0 to source genome"); File clinvarAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(clinvarAnnotated.getName()) + ".bp.vcf.gz"); - if (forceRecreate || !indexExists(clinvarAnnotatedBackport )) + if (!indexExists(clinvarAnnotatedBackport )) { BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger()); bpRunner.execute(clinvarAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), clinvarAnnotatedBackport); @@ -403,49 +370,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno output.addIntermediateFile(clinvarAnnotatedBackport); output.addIntermediateFile(new File(clinvarAnnotatedBackport.getPath() + ".tbi")); - //annotate with cassandra - File cassandraAnnotatedBackport = null; - boolean useCassandra = getProvider().getParameterByName("useCassandra").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); - if (useCassandra) - { - getPipelineCtx().getLogger().info("annotating with Cassandra"); - String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".cassandra"; - File cassandraAnnotated = new File(outputDirectory, basename + ".vcf.gz"); - if (forceRecreate || !indexExists(cassandraAnnotated)) - { - //we can assume splitting happened upstream, so run over the full VCF - runCassandra(liftedToGRCh37, cassandraAnnotated, output, forceRecreate); - } - else - { - getPipelineCtx().getLogger().info("resuming with existing file: " + cassandraAnnotated.getPath()); - } - - output.addOutput(cassandraAnnotated, "VCF Annotated With Cassandra"); - output.addIntermediateFile(cassandraAnnotated); - output.addIntermediateFile(new File(cassandraAnnotated.getPath() + ".tbi")); - - //backport Cassandra - getPipelineCtx().getLogger().info("backport Cassandra to source genome"); - cassandraAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(cassandraAnnotated.getName()) + ".bp.vcf.gz"); - if (forceRecreate || !indexExists(cassandraAnnotatedBackport)) - { - BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger()); - bpRunner.execute(cassandraAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), cassandraAnnotatedBackport); - } - else - { - getPipelineCtx().getLogger().info("resuming with existing file: " + cassandraAnnotatedBackport.getPath()); - } - output.addOutput(cassandraAnnotatedBackport, "VCF Annotated With Cassandra, Backported"); - output.addIntermediateFile(cassandraAnnotatedBackport); - output.addIntermediateFile(new File(cassandraAnnotatedBackport.getPath() + ".tbi")); - } - else - { - getPipelineCtx().getLogger().debug("Cassandra will be skipped"); - } - //annotate with funcotator File funcotatorAnnotatedBackport = null; if (useFuncotator) @@ -453,7 +377,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno getPipelineCtx().getLogger().info("annotating with Funcotator"); String basename = SequenceAnalysisService.get().getUnzippedBaseName(liftedToGRCh37.getName()) + ".funcotator"; File funcotatorAnnotated = new File(outputDirectory, basename + ".vcf.gz"); - if (forceRecreate || !indexExists(funcotatorAnnotated)) + if (!indexExists(funcotatorAnnotated)) { //we can assume splitting happened upstream, so run over the full VCF FuncotatorWrapper fr = new FuncotatorWrapper(getPipelineCtx().getLogger()); @@ -490,7 +414,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno //backport Funcotator getPipelineCtx().getLogger().info("backport Funcotator to source genome"); funcotatorAnnotatedBackport = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(funcotatorAnnotated.getName()) + ".bp.vcf.gz"); - if (forceRecreate || !indexExists(funcotatorAnnotatedBackport)) + if (!indexExists(funcotatorAnnotatedBackport)) { BackportLiftedVcfRunner bpRunner = new BackportLiftedVcfRunner(getPipelineCtx().getLogger()); bpRunner.execute(funcotatorAnnotated, originalGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), funcotatorAnnotatedBackport); @@ -511,7 +435,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno //multiannotator getPipelineCtx().getLogger().info("Running MultiSourceAnnotator"); File multiAnnotated = new File(getPipelineCtx().getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".ma.vcf.gz"); - if (forceRecreate || !indexExists(multiAnnotated)) + if (!indexExists(multiAnnotated)) { MultiSourceAnnotatorRunner maRunner = new MultiSourceAnnotatorRunner(getPipelineCtx().getLogger()); @@ -535,7 +459,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno addToolFieldNames("Funcotator", "-ff", options, multiAnnotated.getParentFile(), output, liftFields); } - maRunner.execute(inputVCF, cassandraAnnotatedBackport, clinvarAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, multiAnnotated, options); + maRunner.execute(inputVCF, clinvarAnnotatedBackport, liftoverRejects, funcotatorAnnotatedBackport, multiAnnotated, options); } else { @@ -588,75 +512,6 @@ private void addToolFieldNames(String toolName, String argName, List opt options.add(fieldFile.getPath()); } - private void runCassandra(File liftedToGRCh37, File finalOutput, VariantProcessingStepOutputImpl output, boolean forceRecreate) throws PipelineJobException - { - List extraArgs = new ArrayList<>(); - - //NOTE: Cassandra will not sort the output when multithreaded, so the extra sorting we would need to do negates any benefit here - String tmpDir = SequencePipelineService.get().getJavaTempDir(); - if (!StringUtils.isEmpty(tmpDir)) - { - File tmpDirFile = new File(tmpDir, "cassandra"); - if (!tmpDirFile.exists()) - { - tmpDirFile.mkdirs(); - } - - extraArgs.add("--tempDir"); - extraArgs.add(tmpDirFile.getPath()); - } - - CassandraRunner cassRunner = new CassandraRunner(getPipelineCtx().getLogger()); - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - cassRunner.setMaxRamOverride(maxRam); - - //Cassandra requires unzipped files - File liftedToGRCh37Unzipped = new File(liftedToGRCh37.getParentFile(), FileUtil.getBaseName(liftedToGRCh37.getName())); - File liftedToGRCh37UnzippedDone = new File(liftedToGRCh37Unzipped.getPath() + ".done"); - if (forceRecreate || !liftedToGRCh37UnzippedDone.exists()) - { - SimpleScriptWrapper wrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); - wrapper.execute(Arrays.asList("gunzip", liftedToGRCh37.getPath())); - try - { - FileUtils.touch(liftedToGRCh37UnzippedDone); - if (!liftedToGRCh37.exists() && indexExists(liftedToGRCh37)) - { - File idx = new File(liftedToGRCh37.getPath() + ".tbi"); - idx.delete(); - } - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - else - { - getPipelineCtx().getLogger().info("Resuming from file: " + liftedToGRCh37Unzipped.getPath()); - } - - output.addIntermediateFile(liftedToGRCh37Unzipped); - output.addIntermediateFile(new File(liftedToGRCh37Unzipped.getPath() + ".idx")); - output.addIntermediateFile(liftedToGRCh37UnzippedDone); - - cassRunner.execute(liftedToGRCh37Unzipped, finalOutput, extraArgs); - if (!finalOutput.exists()) - { - throw new PipelineJobException("Unable to find output"); - } - - try - { - SequenceAnalysisService.get().ensureVcfIndex(finalOutput, getPipelineCtx().getLogger()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - protected static boolean indexExists(File vcf) { File idx = new File(vcf.getPath() + ".tbi"); diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java index df573c6ec..484b9c00e 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java +++ b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java @@ -6,6 +6,7 @@ import htsjdk.variant.vcf.VCFFileReader; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; @@ -29,7 +30,6 @@ import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.sequenceanalysis.pipeline.PipelineStep; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; @@ -62,6 +62,9 @@ public class GenerateMgapTracksStep extends AbstractPipelineStep implements VariantProcessingStep, VariantProcessingStep.SupportsScatterGather { public static final String TRACK_CATEGORY = "mGAP Release Track"; + public static final String VERSION_ROWID = "versionRowId"; + public static final String PRIOR_RELEASE_LABEL = "priorReleaseLabel"; + public static final String SITES_ONLY_DATA = "sitesOnlyVcfData"; // 1) makes the subset VCF per track with those IDs, // 2) dies if it cannot find any of the IDs being requested, @@ -78,11 +81,31 @@ public static class Provider extends AbstractVariantProcessingStepProvider primaryTrackNames = new HashSet<>(); + Map> trackToSubject = new HashMap<>(); + new TableSelector(existingTracks, PageFlowUtil.set("trackName", "isprimarytrack"), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> { + if (trackToSubject.containsKey(rs.getString(FieldKey.fromString("trackName")))) + { + throw new IllegalStateException("Duplicate track names present: " + rs.getString(FieldKey.fromString("trackName"))); + } + + trackToSubject.put(rs.getString(FieldKey.fromString("trackName")), new HashSet<>()); + + if (rs.getObject(FieldKey.fromString("isprimarytrack")) != null & rs.getBoolean(FieldKey.fromString("isprimarytrack"))) + { + primaryTrackNames.add(rs.getString(FieldKey.fromString("trackName"))); + } + }); + + if (primaryTrackNames.size() != 1) + { + throw new IllegalStateException("Expected single primary track, found: " + primaryTrackNames.size()); + } + // Verify all IDs in header are mGAP aliases. This map is the true ID to mGAP alias Map sampleIdToMgapAlias = getSampleToAlias(so.getFile()); // Now read track list, validate IDs present, and write to file: TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_RELEASE_TRACK_SUBSETS); - TableSelector ts = new TableSelector(ti, PageFlowUtil.set("trackName", "subjectId")); + TableSelector ts = new TableSelector(ti, PageFlowUtil.set("trackName", "subjectId"), new SimpleFilter(FieldKey.fromString("trackName"), trackToSubject.keySet(), CompareType.IN), null); Set requestedNotInVcf = new HashSet<>(); - Map> trackToSubject = new HashMap<>(); + ts.forEachResults(rs -> { if (!trackToSubject.containsKey(rs.getString(FieldKey.fromString("trackName")))) { @@ -138,6 +186,11 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List { writer.writeNext(new String[]{trackName, x}); }); @@ -147,67 +200,164 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List intervals) throws PipelineJobException + private File annotateNovelSites(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException { - VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); - Map> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true))); + String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, "0.0"); + if (releaseVersion.toLowerCase().startsWith("v")) + { + releaseVersion = releaseVersion.substring(1); + } - VCFHeader header; - try (VCFFileReader reader = new VCFFileReader(inputVCF)) + if (!NumberUtils.isCreatable(releaseVersion)) { - header = reader.getFileHeader(); + throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion); } - if (!header.hasInfoLine("mGAPV")) + String priorReleaseLabel = getPipelineCtx().getSequenceSupport().getCachedObject(PRIOR_RELEASE_LABEL, String.class); + File sitesOnlyVcf = getAnnotationReferenceVcf(); + + List extraArgs = new ArrayList<>(); + if (intervals != null) { - throw new IllegalStateException("VCF is missing the annotation: mGAPV"); + intervals.forEach(interval -> { + extraArgs.add("-L"); + extraArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); + }); + + extraArgs.add("--ignore-variants-starting-outside-interval"); } - processTracks(output, inputVCF, trackToSamples, outputDirectory, genome, intervals); + if (priorReleaseLabel != null) + { + extraArgs.add("-dv"); + extraArgs.add(priorReleaseLabel); + } - // Also create the Novel Sites track: - String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); - File novelSitesOutput = getNovelSitesOutput(outputDirectory); - if (new File(novelSitesOutput.getPath() + ".tbi").exists()) + if (sitesOnlyVcf != null) { - getPipelineCtx().getLogger().debug("Index exists, will not remake novel sites VCF"); + extraArgs.add("-ns"); + extraArgs.add(getNovelSitesOutput(outputDirectory).getPath()); + } + + File annotatedVCF = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".comparison.vcf.gz"); + if (new File(annotatedVCF.getPath() + ".tbi").exists()) + { + getPipelineCtx().getLogger().debug("Index exists, will not remake annotated sites VCF"); } else { - getPipelineCtx().getJob().setStatus(PipelineJob.TaskStatus.running, "Processing novel sites track"); - - SelectVariantsWrapper sv = new SelectVariantsWrapper(getPipelineCtx().getLogger()); - List svArgs = new ArrayList<>(); - svArgs.add("-select"); - svArgs.add("mGAPV == '" + releaseVersion + "'"); - if (intervals != null) + new AnnotateNovelSitesWrapper(getPipelineCtx().getLogger()).execute(inputVCF, sitesOnlyVcf, genome.getWorkingFastaFile(), releaseVersion, annotatedVCF, extraArgs); + if (!annotatedVCF.exists()) { - intervals.forEach(interval -> { - svArgs.add("-L"); - svArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); - }); + throw new PipelineJobException("Unable to find output: " + annotatedVCF.getPath()); } + } + + return annotatedVCF; + } + + private File getNovelSitesOutput(File outputDirectory) + { + String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + + return new File(outputDirectory, "mGAP_v" + releaseVersion + "_" + species.replaceAll(" ", "_") + "_NovelSites.vcf.gz"); + } + + @Override + public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException + { + VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); + Map> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true))); + + String primaryTrackName = getPipelineCtx().getSequenceSupport().getCachedObject("primaryTrackName", String.class); + Map tracks = processTracks(output, inputVCF, trackToSamples, outputDirectory, genome, intervals); - sv.execute(genome.getWorkingFastaFile(), inputVCF, novelSitesOutput, svArgs); + File primaryTrackFile = tracks.get(primaryTrackName); + if (primaryTrackFile == null) + { + throw new PipelineJobException("Missing primary track"); } - getPipelineCtx().getJob().getLogger().info("total variants: " + SequenceAnalysisService.get().getVCFLineCount(novelSitesOutput, getPipelineCtx().getJob().getLogger(), false)); + File primaryTrackAnnotated = annotateNovelSites(primaryTrackFile, outputDirectory, genome, intervals); + output.addIntermediateFile(primaryTrackAnnotated); + + if (getAnnotationReferenceVcf() != null) + { + File novelSitesOutput = getNovelSitesOutput(outputDirectory); + if (!novelSitesOutput.exists()) + { + throw new PipelineJobException("Missing file: " + novelSitesOutput.getPath()); + } + + getPipelineCtx().getJob().getLogger().info("total novel variants in release: " + SequenceAnalysisService.get().getVCFLineCount(novelSitesOutput, getPipelineCtx().getJob().getLogger(), false)); + } return output; } private File getOutputVcf(String trackName, File outputDirectory) { - return new File(outputDirectory, FileUtil.makeLegalName(trackName) + ".vcf.gz"); + return new File(outputDirectory, FileUtil.makeLegalName(trackName).replaceAll(" ", "_") + ".vcf.gz"); } @Override @@ -221,37 +371,33 @@ public void complete(PipelineJob job, List inputs, List newRow = new CaseInsensitiveHashMap<>(); newRow.put("trackName", trackName); newRow.put("label", trackName); + newRow.put("species", species); newRow.put("vcfId", so.getRowid()); - newRow.put("isprimarytrack", isPrimaryTrack); + newRow.put("isprimarytrack", primaryTrackName.equals(trackName)); BatchValidationException bve = new BatchValidationException(); releaseTracks.getUpdateService().insertRows(job.getUser(), targetContainer, Arrays.asList(newRow), bve, null, null); @@ -281,11 +427,6 @@ private void createOrUpdateTrack(SequenceOutputFile so, PipelineJob job, String } } - private boolean indexExists(File vcf) - { - return new File(vcf.getPath() + ".tbi").exists(); - } - private File getSampleNameFile(File outputDir) { return new File(outputDir, "sampleMapping.txt"); @@ -443,6 +584,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi return f; }).toList(); + job.getLogger().debug("Total VCFs to merge: " + toConcat.size()); if (toConcat.isEmpty()) { @@ -470,45 +612,49 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi manager.addSequenceOutput(so); } - job.getLogger().info("Merging novel sites VCF"); - List toConcat = orderedJobDirs.stream().map(dirName -> { - File f = getNovelSitesOutput(new File(ctx.getSourceDirectory(), dirName)); - if (!f.exists()) - { - throw new IllegalStateException("Missing file: " + f.getPath()); - } + if (getAnnotationReferenceVcf() != null) + { + job.getLogger().info("Merging novel sites VCF"); + List toConcat = orderedJobDirs.stream().map(dirName -> { + File f = getNovelSitesOutput(new File(ctx.getSourceDirectory(), dirName)); + if (!f.exists()) + { + throw new IllegalStateException("Missing file: " + f.getPath()); + } - ctx.getFileManager().addIntermediateFile(f); - ctx.getFileManager().addIntermediateFile(new File(f.getPath() + ".tbi")); + ctx.getFileManager().addIntermediateFile(f); + ctx.getFileManager().addIntermediateFile(new File(f.getPath() + ".tbi")); - return f; - }).toList(); + return f; + }).toList(); - if (toConcat.isEmpty()) - { - throw new PipelineJobException("No novel sites VCFs found"); - } + if (toConcat.isEmpty()) + { + throw new PipelineJobException("No novel sites VCFs found"); + } - String basename = SequenceAnalysisService.get().getUnzippedBaseName(toConcat.get(0).getName()); - File combined = new File(ctx.getSourceDirectory(), basename + ".vcf.gz"); - File combinedIdx = new File(combined.getPath() + ".tbi"); - if (combinedIdx.exists()) - { - job.getLogger().info("VCF exists, will not recreate: " + combined.getPath()); - } - else - { - combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, job.getLogger(), true, null); - } + String basename = SequenceAnalysisService.get().getUnzippedBaseName(toConcat.get(0).getName()); + File combined = new File(ctx.getSourceDirectory(), basename + ".vcf.gz"); + File combinedIdx = new File(combined.getPath() + ".tbi"); + if (combinedIdx.exists()) + { + job.getLogger().info("VCF exists, will not recreate: " + combined.getPath()); + } + else + { + combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, job.getLogger(), true, null); + } - SequenceOutputFile so = new SequenceOutputFile(); - so.setName("Novel Sites in This Release"); - so.setFile(combined); - so.setCategory(TRACK_CATEGORY); - so.setLibrary_id(genome.getGenomeId()); - String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); - so.setDescription("These are novel sites in mGAP v" + releaseVersion); - manager.addSequenceOutput(so); + String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + String species = getProvider().getParameterByName("species").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); + SequenceOutputFile so = new SequenceOutputFile(); + so.setName(species + ": Novel Sites in Release " + releaseVersion); + so.setFile(combined); + so.setCategory(TRACK_CATEGORY); + so.setLibrary_id(genome.getGenomeId()); + so.setDescription("These are novel sites in mGAP v" + releaseVersion + " for " + species); + manager.addSequenceOutput(so); + } } public static class SplitVcfBySamplesWrapper extends AbstractDiscvrSeqWrapper diff --git a/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java b/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java index 23d0b388a..6a1c50939 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java +++ b/mGAP/src/org/labkey/mgap/pipeline/MultiSourceAnnotatorRunner.java @@ -15,19 +15,13 @@ public MultiSourceAnnotatorRunner(Logger log) super(log); } - public File execute(File inputVcf, @Nullable File cassandraVcf, File clinvarAnnotatedBackport, File liftoverRejects, @Nullable File funcotator, File outputVcf, @Nullable List options) throws PipelineJobException + public File execute(File inputVcf, File clinvarAnnotatedBackport, File liftoverRejects, @Nullable File funcotator, File outputVcf, @Nullable List options) throws PipelineJobException { List args = getBaseArgs("MultiSourceAnnotator"); args.add("-V"); args.add(inputVcf.getPath()); - if (cassandraVcf != null) - { - args.add("-c"); - args.add(cassandraVcf.getPath()); - } - args.add("-lr"); args.add(liftoverRejects.getPath()); diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java deleted file mode 100644 index 70e30aaf1..000000000 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseAnnotateNovelSitesStep.java +++ /dev/null @@ -1,206 +0,0 @@ -package org.labkey.mgap.pipeline; - -import htsjdk.samtools.util.Interval; -import org.apache.commons.lang3.math.NumberUtils; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; -import org.json.JSONObject; -import org.labkey.api.data.SimpleFilter; -import org.labkey.api.data.TableSelector; -import org.labkey.api.pipeline.PipelineJob; -import org.labkey.api.pipeline.PipelineJobException; -import org.labkey.api.query.FieldKey; -import org.labkey.api.sequenceanalysis.SequenceAnalysisService; -import org.labkey.api.sequenceanalysis.SequenceOutputFile; -import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; -import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; -import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; -import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; -import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; -import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper; -import org.labkey.api.util.PageFlowUtil; -import org.labkey.mgap.mGAPSchema; - -import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * User: bimber - * Date: 6/15/2014 - * Time: 12:39 PM - */ -public class mGapReleaseAnnotateNovelSitesStep extends AbstractCommandPipelineStep implements VariantProcessingStep -{ - public static final String VERSION_ROWID = "versionRowId"; - public static final String PRIOR_RELEASE_LABEL = "priorReleaseLabel"; - public static final String SITES_ONLY_DATA = "sitesOnlyVcfData"; - - public mGapReleaseAnnotateNovelSitesStep(PipelineStepProvider provider, PipelineContext ctx) - { - super(provider, ctx, new AnnotateNovelSitesWrapper(ctx.getLogger())); - } - - public static class Provider extends AbstractVariantProcessingStepProvider implements SupportsScatterGather - { - public Provider() - { - super("mGapAnnotateNovelSites", "Annotate Novel Sites Against mGAP Release", "AnnotateNovelSites", "Compare the VCF to the specified mGAP release VCF, producing TSV/VCF reports with site- and genotype-level concordance.", Arrays.asList( - ToolParameterDescriptor.create(VERSION_ROWID, "mGAP Release", "The mGAP release VCF to use for comparison", "ldk-simplelabkeycombo", new JSONObject(){{ - put("allowBlank", false); - put("width", 400); - put("schemaName", "mgap"); - put("queryName", "variantCatalogReleases"); - put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()"); - put("displayField", "version"); - put("valueField", "rowid"); - put("doNotIncludeInTemplates", true); - }}, null), - ToolParameterDescriptor.create("releaseVersion", "mGAP Version", "This string will be used to tag novel variants.", "textfield", new JSONObject(){{ - put("allowBlank", false); - put("doNotIncludeInTemplates", true); - }}, null) - ), PageFlowUtil.set("sequenceanalysis/field/SequenceOutputFileSelectorField.js"), null); - } - - @Override - public mGapReleaseAnnotateNovelSitesStep create(PipelineContext ctx) - { - return new mGapReleaseAnnotateNovelSitesStep(this, ctx); - } - } - - @Override - public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException - { - VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); - getPipelineCtx().getLogger().info("Annotating VCF by mGAP Release"); - - String releaseVersion = getProvider().getParameterByName("releaseVersion").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, "0.0"); - if (releaseVersion.toLowerCase().startsWith("v")) - { - releaseVersion = releaseVersion.substring(1); - } - - if (!NumberUtils.isCreatable(releaseVersion)) - { - throw new IllegalArgumentException("Expected the release version to be numeric: " + releaseVersion); - } - - String priorReleaseLabel = getPipelineCtx().getSequenceSupport().getCachedObject(PRIOR_RELEASE_LABEL, String.class); - int sitesOnlyExpDataId = getPipelineCtx().getSequenceSupport().getCachedObject(SITES_ONLY_DATA, Integer.class); - File sitesOnlyVcf = getPipelineCtx().getSequenceSupport().getCachedData(sitesOnlyExpDataId); - if (!sitesOnlyVcf.exists()) - { - throw new PipelineJobException("Unable to find file: " + sitesOnlyVcf); - } - - List extraArgs = new ArrayList<>(); - if (intervals != null) - { - intervals.forEach(interval -> { - extraArgs.add("-L"); - extraArgs.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); - }); - - extraArgs.add("--ignore-variants-starting-outside-interval"); - } - - extraArgs.add("-dv"); - extraArgs.add(priorReleaseLabel); - - File annotatedVCF = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".comparison.vcf.gz"); - getWrapper().execute(inputVCF, sitesOnlyVcf, genome.getWorkingFastaFile(), releaseVersion, annotatedVCF, extraArgs); - if (!annotatedVCF.exists()) - { - throw new PipelineJobException("Unable to find output: " + annotatedVCF.getPath()); - } - - output.addInput(inputVCF, "Input VCF"); - output.addInput(sitesOnlyVcf, "Reference VCF"); - - output.addOutput(annotatedVCF, "VCF Annotated by mGAP Version"); - output.setVcf(annotatedVCF); - - return output; - } - - @Override - public void init(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles) throws PipelineJobException - { - Integer versionRowId = getProvider().getParameterByName(VERSION_ROWID).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class); - String version = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("version"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(String.class); - if (version == null) - { - throw new PipelineJobException("Unable to find release for release: " + versionRowId); - } - - Integer referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("sitesOnlyVcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class); - if (referenceVcfOutputId == null) - { - getPipelineCtx().getLogger().debug("Sites-only VCF not found, using primary VCF"); - referenceVcfOutputId = new TableSelector(mGAPSchema.getInstance().getSchema().getTable(mGAPSchema.TABLE_VARIANT_CATALOG_RELEASES), PageFlowUtil.set("vcfId"), new SimpleFilter(FieldKey.fromString("rowId"), versionRowId), null).getObject(Integer.class); - } - - if (referenceVcfOutputId == null) - { - throw new PipelineJobException("Unable to find sites-only VCF for release: " + versionRowId); - } - - SequenceOutputFile sitesOnly = SequenceOutputFile.getForId(referenceVcfOutputId); - if (sitesOnly == null) - { - throw new PipelineJobException("Unable to find sites-only VCF output file for fileId: " + referenceVcfOutputId); - } - - support.cacheExpData(sitesOnly.getExpData()); - - support.cacheObject(SITES_ONLY_DATA, sitesOnly.getDataId()); - support.cacheObject(PRIOR_RELEASE_LABEL, version); - } - - public static class AnnotateNovelSitesWrapper extends AbstractDiscvrSeqWrapper - { - public AnnotateNovelSitesWrapper(Logger log) - { - super(log); - } - - public File execute(File vcf, File referenceVcf, File fasta, String versionString, File vcfOutput, List extraArgs) throws PipelineJobException - { - List args = new ArrayList<>(getBaseArgs()); - args.add("AnnotateNovelSites"); - args.add("-R"); - args.add(fasta.getPath()); - - args.add("-V"); - args.add(vcf.getPath()); - args.add("-rv"); - args.add(referenceVcf.getPath()); - - args.add("-an"); - args.add("mGAPV"); - args.add("-ad"); - args.add("The first mGAP version where variants at this site appeared"); - args.add("-av"); - args.add(versionString); - - args.add("-O"); - args.add(vcfOutput.getPath()); - - if (extraArgs != null) - { - args.addAll(extraArgs); - } - - execute(args); - - return vcfOutput; - } - } -} diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java index 278960bc1..6a2ac50bd 100644 --- a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java +++ b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java @@ -96,11 +96,21 @@ public class mGapReleaseGenerator extends AbstractParameterizedOutputHandler { private final FileType _vcfType = new FileType(List.of(".vcf"), ".vcf", false, FileType.gzSupportLevel.SUPPORT_GZ); - public static final String MMUL_GENOME = "mmulGenome"; + public static final String BASE_GENOME = "baseGenome"; public mGapReleaseGenerator() { super(ModuleLoader.getInstance().getModule(mGAPModule.class), "Create mGAP Release", "This will prepare an input VCF for use as an mGAP public release. This will optionally include: removing excess annotations and program records, limiting to SNVs (optional) and removing genotype data (optional). If genotypes are retained, the subject names will be checked for mGAP aliases and replaced as needed.", new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/GenomeFileSelectorField.js")), Arrays.asList( + ToolParameterDescriptor.create("species", "Version", "The species, which is used to filter tracks", "ldk-simplelabkeycombo", new JSONObject(){{ + put("allowBlank", false); + put("doNotIncludeInTemplates", true); + put("width", 400); + put("schemaName", "laboratory"); + put("queryName", "species"); + put("containerPath", "js:Laboratory.Utils.getQueryContainerPath()"); + put("displayField", "common_name"); + put("valueField", "common_name"); + }}, null), ToolParameterDescriptor.create("releaseVersion", "Version", "This value will be used as the version when published.", "textfield", new JSONObject(){{ put("allowBlank", false); put("doNotIncludeInTemplates", true); @@ -182,10 +192,16 @@ public void init(JobContext ctx, List inputFiles, List toSelect = new HashSet<>(); toSelect.add(FieldKey.fromString("trackName")); - toSelect.add(FieldKey.fromString("mergepriority")); + toSelect.add(FieldKey.fromString("species")); toSelect.add(FieldKey.fromString("skipvalidation")); toSelect.add(FieldKey.fromString("isprimarytrack")); toSelect.add(FieldKey.fromString("vcfId")); @@ -197,7 +213,7 @@ public void init(JobContext ctx, List inputFiles, List { + new TableSelector(releaseTracks, colMap.values(), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> { if (rs.getObject(FieldKey.fromString("vcfId")) == null) { throw new SQLException("No VCF found for track: " + rs.getObject(FieldKey.fromString("trackName"))); @@ -217,7 +233,7 @@ public void init(JobContext ctx, List inputFiles, List inputFiles, List inputFiles, List row = new CaseInsensitiveHashMap<>(); row.put("version", job.getParameters().get("releaseVersion")); row.put("releaseDate", new Date()); + row.put("species", species); row.put("vcfId", so.getRowid()); row.put("liftedVcfId", liftedVcf.getRowid()); row.put("sitesOnlyVcfId", sitesOnlyVcf.getRowid()); @@ -583,7 +602,7 @@ else if (so.getCategory().endsWith("Release Track")) //also tracks: UserSchema us = QueryService.get().getUserSchema(job.getUser(), job.getContainer().isWorkbook() ? job.getContainer().getParent() : job.getContainer(), mGAPSchema.NAME); - new TableSelector(us.getTable(mGAPSchema.TABLE_RELEASE_TRACKS), null, null).forEachResults(rs -> { + new TableSelector(us.getTable(mGAPSchema.TABLE_RELEASE_TRACKS), new SimpleFilter(FieldKey.fromString("species"), species), null).forEachResults(rs -> { SequenceOutputFile so3 = trackVCFMap.get(rs.getString(FieldKey.fromString("trackName"))); if (so3 == null && rs.getBoolean(FieldKey.fromString("isprimarytrack"))) { @@ -836,7 +855,7 @@ public static class TrackDescriptor { String _trackName; Integer _dataId; - Integer _mergePriority; + String _species; boolean _skipValidation; boolean _isPrimary; @@ -844,7 +863,7 @@ public TrackDescriptor(String[] vals) { _trackName = vals[0]; _dataId = Integer.parseInt(vals[1]); - _mergePriority = Integer.parseInt(vals[2]); + _species = vals[2]; _skipValidation = Boolean.parseBoolean(vals[3]); _isPrimary = Boolean.parseBoolean(vals[4]); } @@ -859,9 +878,9 @@ public Integer getDataId() return _dataId; } - public Integer getMergePriority() + public String getSpecies() { - return _mergePriority; + return _species; } public boolean isSkipValidation() @@ -886,15 +905,6 @@ private List getTracks(File webserverDir) throws PipelineJobExc ret.add(new TrackDescriptor(line)); } - ret.sort(new Comparator() - { - @Override - public int compare(TrackDescriptor o1, TrackDescriptor o2) - { - return o1.getMergePriority().compareTo(o2.getMergePriority()); - } - }); - return ret; } catch (IOException e) @@ -917,12 +927,13 @@ public void processFilesRemote(List inputFiles, JobContext c GeneToNameTranslator translator = new GeneToNameTranslator(gtf, ctx.getLogger()); ReferenceGenome grch37Genome = ctx.getSequenceSupport().getCachedGenome(ctx.getParams().getInt(AnnotationStep.GRCH37)); - int genomeId = ctx.getSequenceSupport().getCachedObject(MMUL_GENOME, Integer.class); + int genomeId = ctx.getSequenceSupport().getCachedObject(BASE_GENOME, Integer.class); ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeId); boolean testOnly = ctx.getParams().optBoolean("testOnly", false); + String species = ctx.getParams().getString("species"); String releaseVersion = ctx.getParams().optString("releaseVersion", "0.0"); - File primaryTrackVcf = new File(ctx.getOutputDir(), "mGap.v" + FileUtil.makeLegalName(releaseVersion).replaceAll(" ", "_") + ".vcf.gz"); + File primaryTrackVcf = new File(ctx.getOutputDir(), "mGap." + species + ".v" + FileUtil.makeLegalName(releaseVersion).replaceAll(" ", "_") + ".vcf.gz"); try { @@ -994,7 +1005,7 @@ public void processFilesRemote(List inputFiles, JobContext c SequenceOutputFile output = new SequenceOutputFile(); output.setFile(primaryTrackVcf); - output.setName("mGAP Release: " + releaseVersion); + output.setName("mGAP Release: " + species + " " + releaseVersion); output.setCategory((testOnly ? "Test " : "") + "mGAP Release"); output.setLibrary_id(genome.getGenomeId()); ctx.getFileManager().addSequenceOutput(output); @@ -1002,7 +1013,7 @@ public void processFilesRemote(List inputFiles, JobContext c File interestingVariantTable = getVariantTableName(ctx, primaryTrackVcf); SequenceOutputFile output2 = new SequenceOutputFile(); output2.setFile(interestingVariantTable); - output2.setName("mGAP Release: " + releaseVersion + " Variant Table"); + output2.setName("mGAP Release: " + species + " " + releaseVersion + " Variant Table"); output2.setCategory((testOnly ? "Test " : "") + "mGAP Release Variant Table"); output2.setLibrary_id(genome.getGenomeId()); ctx.getFileManager().addSequenceOutput(output2); @@ -1012,7 +1023,7 @@ public void processFilesRemote(List inputFiles, JobContext c File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, grch37Genome); SequenceOutputFile output3 = new SequenceOutputFile(); output3.setFile(lifted); - output3.setName("mGAP Release: " + releaseVersion + " Lifted to Human"); + output3.setName("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human"); output3.setCategory((testOnly ? "Test " : "") + "mGAP Release Lifted to Human"); output3.setLibrary_id(grch37Genome.getGenomeId()); ctx.getFileManager().addSequenceOutput(output3); @@ -1111,16 +1122,6 @@ private File getSitesOnlyVcfName(File outDir, File primaryTrackVcf) return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".sitesOnly.vcf.gz"); } - private File getDroppedSitesVcfName(File outDir, File primaryTrackVcf) - { - return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".droppedFromPriorRelease.vcf.gz"); - } - - private File getNovelSitesVcfName(File outDir, File primaryTrackVcf) - { - return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".newToRelease.vcf.gz"); - } - private File getLiftedVcfName(File outDir, File primaryTrackVcf) { return new File(outDir, SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".liftToGRCh37.vcf.gz"); diff --git a/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java b/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java new file mode 100644 index 000000000..317773403 --- /dev/null +++ b/mGAP/src/org/labkey/mgap/query/SampleSummaryCustomizer.java @@ -0,0 +1,42 @@ +package org.labkey.mgap.query; + +import org.labkey.api.data.AbstractTableInfo; +import org.labkey.api.data.JdbcType; +import org.labkey.api.data.SQLFragment; +import org.labkey.api.data.TableInfo; +import org.labkey.api.gwt.client.FacetingBehaviorType; +import org.labkey.api.ldk.table.AbstractTableCustomizer; +import org.labkey.api.query.ExprColumn; + +public class SampleSummaryCustomizer extends AbstractTableCustomizer +{ + @Override + public void customize(TableInfo ti) + { + if (ti instanceof AbstractTableInfo ati) + { + customizeTable(ati); + } + } + + private void customizeTable(AbstractTableInfo ti) + { + String fieldName = "subjectCaseMismatch"; + if (ti.getColumn(fieldName) != null) + { + return; + } + + if (!ti.getSqlDialect().isSqlServer()) + { + return; + } + + SQLFragment sql = new SQLFragment("CASE WHEN HASHBYTES('sha1', " + ExprColumn.STR_TABLE_ALIAS + ".subjectId) = HASHBYTES('sha1', " + ExprColumn.STR_TABLE_ALIAS + ".aliasSubjectName) THEN NULL ELSE " + ExprColumn.STR_TABLE_ALIAS + ".aliasSubjectName END"); + ExprColumn col = new ExprColumn(ti, fieldName, sql, JdbcType.VARCHAR, ti.getColumn("subjectId"), ti.getColumn("aliasSubjectName")); + col.setLabel("Id Case Mismatch?"); + col.setFacetingBehaviorType(FacetingBehaviorType.ALWAYS_OFF); + col.setDescription("If the case of the subjectId differs from the alias table, the updated case is shown"); + ti.addColumn(col); + } +} diff --git a/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java b/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java index f960fa01a..cc947f79f 100644 --- a/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java +++ b/mGAP/src/org/labkey/mgap/query/mGAPUserSchema.java @@ -76,7 +76,20 @@ else if (mGAPSchema.TABLE_RELEASE_TRACKS.equalsIgnoreCase(name)) private TableInfo createWrappedVariantTable(String name, TableInfo sourceTable, ContainerFilter cf) { - return super.createWrappedTable(name, sourceTable, cf); + AbstractTableInfo ati = (AbstractTableInfo)super.createWrappedTable(name, sourceTable, cf); + + String fieldName = "versionAndSpecies"; + if (ati.getColumn(fieldName) == null) + { + SQLFragment sql = new SQLFragment("(" + ati.getSqlDialect().concatenate(ExprColumn.STR_TABLE_ALIAS + ".species", "': '", ExprColumn.STR_TABLE_ALIAS + ".version") + ")"); + ExprColumn col = new ExprColumn(ati, fieldName, sql, JdbcType.VARCHAR, ati.getColumn("version"), ati.getColumn("species")); + col.setLabel("Version and Species"); + col.setFacetingBehaviorType(FacetingBehaviorType.ALWAYS_OFF); + col.setDescription("This column shows the version and species"); + ati.addColumn(col); + } + + return ati; } private TableInfo customizeReleaseTracks(String name, TableInfo sourceTable, ContainerFilter cf) diff --git a/mcc/package-lock.json b/mcc/package-lock.json index 3655ca754..9091b05dc 100644 --- a/mcc/package-lock.json +++ b/mcc/package-lock.json @@ -4938,21 +4938,6 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "dev": true }, - "node_modules/body-parser/node_modules/qs": { - "version": "6.13.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", - "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", - "dev": true, - "dependencies": { - "side-channel": "^1.0.6" - }, - "engines": { - "node": ">=0.6" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/bonjour-service": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/bonjour-service/-/bonjour-service-1.2.1.tgz", @@ -5554,10 +5539,11 @@ "integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A==" }, "node_modules/cookie": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", - "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==", + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz", + "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==", "dev": true, + "license": "MIT", "engines": { "node": ">= 0.6" } @@ -6587,24 +6573,25 @@ } }, "node_modules/express": { - "version": "4.20.0", - "resolved": "https://registry.npmjs.org/express/-/express-4.20.0.tgz", - "integrity": "sha512-pLdae7I6QqShF5PnNTCVn4hI91Dx0Grkn2+IAsMTgMIKuQVte2dN9PeGSSAME2FR8anOhVA62QDIUaWVfEXVLw==", + "version": "4.21.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz", + "integrity": "sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ==", "dev": true, + "license": "MIT", "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", - "cookie": "0.6.0", + "cookie": "0.7.1", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", - "finalhandler": "1.2.0", + "finalhandler": "1.3.1", "fresh": "0.5.2", "http-errors": "2.0.0", "merge-descriptors": "1.0.3", @@ -6613,11 +6600,11 @@ "parseurl": "~1.3.3", "path-to-regexp": "0.1.10", "proxy-addr": "~2.0.7", - "qs": "6.11.0", + "qs": "6.13.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", "send": "0.19.0", - "serve-static": "1.16.0", + "serve-static": "1.16.2", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", @@ -6750,13 +6737,14 @@ } }, "node_modules/finalhandler": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz", - "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz", + "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==", "dev": true, + "license": "MIT", "dependencies": { "debug": "2.6.9", - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "on-finished": "2.4.1", "parseurl": "~1.3.3", @@ -6772,15 +6760,27 @@ "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", "dev": true, + "license": "MIT", "dependencies": { "ms": "2.0.0" } }, + "node_modules/finalhandler/node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/finalhandler/node_modules/ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "dev": true + "dev": true, + "license": "MIT" }, "node_modules/find-cache-dir": { "version": "4.0.0", @@ -7857,10 +7857,11 @@ } }, "node_modules/http-proxy-middleware": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz", - "integrity": "sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw==", + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz", + "integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==", "dev": true, + "license": "MIT", "dependencies": { "@types/http-proxy": "^1.17.8", "http-proxy": "^1.18.1", @@ -11846,12 +11847,13 @@ ] }, "node_modules/qs": { - "version": "6.11.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", - "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", "dev": true, + "license": "BSD-3-Clause", "dependencies": { - "side-channel": "^1.0.4" + "side-channel": "^1.0.6" }, "engines": { "node": ">=0.6" @@ -12946,63 +12948,29 @@ } }, "node_modules/serve-static": { - "version": "1.16.0", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.0.tgz", - "integrity": "sha512-pDLK8zwl2eKaYrs8mrPZBJua4hMplRWJ1tIFksVC3FtBEBnl8dxgeHtsaMS8DhS9i4fLObaon6ABoc4/hQGdPA==", + "version": "1.16.2", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz", + "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==", "dev": true, + "license": "MIT", "dependencies": { - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "parseurl": "~1.3.3", - "send": "0.18.0" + "send": "0.19.0" }, "engines": { "node": ">= 0.8.0" } }, - "node_modules/serve-static/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/serve-static/node_modules/debug/node_modules/ms": { + "node_modules/serve-static/node_modules/encodeurl": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", - "dev": true - }, - "node_modules/serve-static/node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true - }, - "node_modules/serve-static/node_modules/send": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", - "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", "dev": true, - "dependencies": { - "debug": "2.6.9", - "depd": "2.0.0", - "destroy": "1.2.0", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "fresh": "0.5.2", - "http-errors": "2.0.0", - "mime": "1.6.0", - "ms": "2.1.3", - "on-finished": "2.4.1", - "range-parser": "~1.2.1", - "statuses": "2.0.1" - }, + "license": "MIT", "engines": { - "node": ">= 0.8.0" + "node": ">= 0.8" } }, "node_modules/set-blocking": { diff --git a/mcc/src/client/U24Dashboard/Dashboard.tsx b/mcc/src/client/U24Dashboard/Dashboard.tsx index 7acbdd8e2..8819a0789 100644 --- a/mcc/src/client/U24Dashboard/Dashboard.tsx +++ b/mcc/src/client/U24Dashboard/Dashboard.tsx @@ -63,24 +63,29 @@ export function Dashboard() { scope: this }); - Query.selectRows({ - containerPath: requestContainerPath, - schemaName: 'mcc', - queryName: 'requestScores', - columns: 'requestId/status', - success: function(results) { - if (isApiSubscribed) { - setRequestRows(results.rows); - } - }, - failure: function(response) { - if (isApiSubscribed) { - alert('There was an error loading data'); - console.error(response); - } - }, - scope: this - }); + if (ctx.hasRequestReadPermission) { + Query.selectRows({ + containerPath: requestContainerPath, + schemaName: 'mcc', + queryName: 'requestScores', + columns: 'requestId/status', + success: function (results) { + if (isApiSubscribed) { + setRequestRows(results.rows); + } + }, + failure: function (response) { + if (isApiSubscribed) { + alert('There was an error loading data'); + console.error(response); + } + }, + scope: this + }); + } + else { + setRequestRows([]) + } Query.selectRows({ containerPath: containerPath, diff --git a/mcc/src/org/labkey/mcc/MccModule.java b/mcc/src/org/labkey/mcc/MccModule.java index 673b3d950..3a0c891cd 100644 --- a/mcc/src/org/labkey/mcc/MccModule.java +++ b/mcc/src/org/labkey/mcc/MccModule.java @@ -56,6 +56,7 @@ import org.labkey.mcc.security.MccRabReviewerRole; import org.labkey.mcc.security.MccRequestAdminPermission; import org.labkey.mcc.security.MccRequesterRole; +import org.labkey.mcc.security.MccViewRequestsPermission; import java.util.Collection; import java.util.Collections; @@ -109,6 +110,7 @@ public JSONObject getPageContextJson(ContainerUser context) Container requestContainer = MccManager.get().getMCCRequestContainer(context.getContainer()); ret.put("hasRequestAdminPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccRequestAdminPermission.class)); + ret.put("hasRequestReadPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccViewRequestsPermission.class)); ret.put("hasRabPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccRabReviewPermission.class)); ret.put("hasFinalDecisionPermission", requestContainer != null && requestContainer.hasPermission(context.getUser(), MccFinalReviewPermission.class)); diff --git a/primeseq/src/org/labkey/primeseq/PrimeseqController.java b/primeseq/src/org/labkey/primeseq/PrimeseqController.java index d93d71258..3606da7ef 100644 --- a/primeseq/src/org/labkey/primeseq/PrimeseqController.java +++ b/primeseq/src/org/labkey/primeseq/PrimeseqController.java @@ -33,7 +33,12 @@ import org.labkey.api.data.ContainerType; import org.labkey.api.data.DbScope; import org.labkey.api.data.SQLFragment; +import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.SqlExecutor; +import org.labkey.api.data.TableSelector; +import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExpRun; +import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.module.Module; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipeRoot; @@ -42,10 +47,13 @@ import org.labkey.api.pipeline.PipelineService; import org.labkey.api.pipeline.PipelineStatusFile; import org.labkey.api.pipeline.PipelineUrls; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.QueryService; import org.labkey.api.security.RequiresPermission; import org.labkey.api.security.RequiresSiteAdmin; import org.labkey.api.security.permissions.ReadPermission; import org.labkey.api.security.permissions.UpdatePermission; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.HasJobParams; import org.labkey.api.sequenceanalysis.pipeline.JobResourceSettings; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; @@ -795,4 +803,77 @@ public void setRestartJobs(boolean restartJobs) _restartJobs = restartJobs; } } + + @RequiresSiteAdmin + public static class FixSbtAction extends ConfirmAction + { + @Override + public ModelAndView getConfirmView(Object o, BindException errors) throws Exception + { + setTitle("Fix SBT Errors"); + + return new HtmlView(HtmlString.of("This will update filepaths on SBT outputs. Do you want to continue?")); + } + + @Override + public boolean handlePost(Object o, BindException errors) throws Exception + { + new TableSelector(QueryService.get().getUserSchema(getUser(), getContainer(), "sequenceanalysis").getTable("outputfiles"), PageFlowUtil.set("rowid"), new SimpleFilter(FieldKey.fromString("category"), "SBT Results"), null).forEachResults(rs -> { + SequenceOutputFile so = SequenceOutputFile.getForId(rs.getInt(FieldKey.fromString("rowid"))); + + File f = so.getFile(); + if (f.exists()) + { + return; + } + + ExpRun run = ExperimentService.get().getExpRun(so.getRunId()); + PipelineStatusFile sf = PipelineService.get().getStatusFile(run.getJobId()); + File logFile = new File(sf.getFilePath()); + File root = logFile.getParentFile(); + File [] dirs = root.listFiles(fn -> { + return fn.isDirectory() & !fn.getName().equalsIgnoreCase("Shared"); + }); + + if (dirs == null || dirs.length == 0) + { + _log.error("Unable to file directory for: " + f.getPath()); + return; + } + + File parent = new File(dirs[0], "Alignment"); + File [] children = parent.listFiles(fn -> { + return fn.getName().endsWith(".sbt_hits.txt.gz"); + }); + + if (children == null || children.length != 1) + { + _log.error("Unable to file child under: " + parent.getPath()); + return; + } + + _log.info("Found: " + children[0].getPath()); + + ExpData d = so.getExpData(); + d.setDataFileURI(children[0].toURI()); + + d.save(getUser()); + }); + + return true; + } + + @Override + public void validateCommand(Object o, Errors errors) + { + + } + + @NotNull + @Override + public URLHelper getSuccessURL(Object o) + { + return PageFlowUtil.urlProvider(PipelineUrls.class).urlBegin(getContainer()); + } + } } \ No newline at end of file diff --git a/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java b/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java index 5158b5783..e7eaf6c75 100644 --- a/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java +++ b/primeseq/src/org/labkey/primeseq/pipeline/ExacloudResourceSettings.java @@ -3,12 +3,17 @@ import org.json.JSONObject; import org.labkey.api.data.Container; import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipeRoot; +import org.labkey.api.pipeline.PipelineService; import org.labkey.api.sequenceanalysis.pipeline.JobResourceSettings; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.primeseq.PrimeseqModule; import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * Created by bimber on 9/30/2016. @@ -43,4 +48,37 @@ public boolean isAvailable(Container c) { return c.getActiveModules().contains(ModuleLoader.getInstance().getModule(PrimeseqModule.class)); } + + @Override + public Collection getDockerVolumes(Container c) + { + Set volumes = new HashSet<>(); + volumes.add("/home/groups/prime-seq"); + volumes.add("/home/exacloud/gscratch"); + + PipeRoot pr = PipelineService.get().findPipelineRoot(c); + if (pr != null && pr.getRootPath().exists()) + { + if (pr.getRootPath().getPath().startsWith("/home/groups/")) + { + String folderName = pr.getRootPath().getPath().replaceAll("^/home/groups/", "").split("/")[0]; + volumes.add("/home/groups/" + folderName); + } + } + + if (c.isWorkbook()) + { + PipeRoot pr2 = PipelineService.get().findPipelineRoot(c.getParent()); + if (pr2 != null && pr2.getRootPath().exists()) + { + if (pr2.getRootPath().getPath().startsWith("/home/groups/")) + { + String folderName = pr2.getRootPath().getPath().replaceAll("^/home/groups/", "").split("/")[0]; + volumes.add("/home/groups/" + folderName); + } + } + } + + return volumes; + } }