From c1d4950159259b94337848996d118bdd7eb9847a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 2 Nov 2024 07:12:59 -0700 Subject: [PATCH 01/37] Test fix --- .../org/labkey/test/tests/external/labModules/JBrowseTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java index b3f7219bb..a59616c72 100644 --- a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java +++ b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java @@ -286,7 +286,7 @@ private void testFilterWidget() sleep(1000); // NOTE: depending on the size of the view area, this can vary. This is more a factor of the environment that actual behavior - Assert.assertEquals("Incorrect number of variants", 87.0, getTotalVariantFeatures(), 1.0); + Assert.assertEquals("Incorrect number of variants", 85.0, getTotalVariantFeatures(), 1.0); // bottom filter UI waitForElement(Locator.tagContainingText("button", "mGAP: Showing sites where").containing("AF < 0.02")); From 3252d130d25584328df20fa111839b7c51e00605 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 2 Nov 2024 09:52:10 -0700 Subject: [PATCH 02/37] Test fix --- .../org/labkey/test/tests/external/labModules/JBrowseTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java index a59616c72..008634baf 100644 --- a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java +++ b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java @@ -1808,7 +1808,7 @@ private void testLuceneSearchUI(String sessionId) waitForElement(Locator.tagWithText("li", "does not equal")).click(); waitForElement(Locator.tagWithId("input", "value-select-0")).sendKeys("A"); waitAndClick(Locator.tagWithClass("button", "filter-form-select-button")); - waitForElement(Locator.tagWithText("span", "858")); + waitForElement(Locator.tagWithText("span", "711")); clearFilterDialog("ref does not equal A"); // Alt Allele contains TT From 6b7690de635c5cd69f1b0c6a966714fccb586d24 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 5 Nov 2024 08:06:48 -0800 Subject: [PATCH 03/37] Attempt to fix JBrowseTest (#303) * Attempt to fix JBrowseTest --- .../tests/external/labModules/JBrowseTest.java | 2 +- .../external/labModules/JBrowseTestHelper.java | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java index 008634baf..a6055d232 100644 --- a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java +++ b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTest.java @@ -286,7 +286,7 @@ private void testFilterWidget() sleep(1000); // NOTE: depending on the size of the view area, this can vary. This is more a factor of the environment that actual behavior - Assert.assertEquals("Incorrect number of variants", 85.0, getTotalVariantFeatures(), 1.0); + Assert.assertEquals("Incorrect number of variants", 87.0, getTotalVariantFeatures(), 1.0); // bottom filter UI waitForElement(Locator.tagContainingText("button", "mGAP: Showing sites where").containing("AF < 0.02")); diff --git a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTestHelper.java b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTestHelper.java index 596d64e9d..b4efebabc 100644 --- a/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTestHelper.java +++ b/jbrowse/test/src/org/labkey/test/tests/external/labModules/JBrowseTestHelper.java @@ -6,6 +6,7 @@ import org.labkey.test.Locator; import org.labkey.test.TestFileUtils; import org.labkey.test.WebDriverWrapper; +import org.labkey.test.categories.Base; import org.labkey.test.components.ext4.Window; import org.labkey.test.util.DataRegionTable; import org.labkey.test.util.Ext4Helper; @@ -166,19 +167,29 @@ public static void waitForJBrowseToLoad(BaseWebDriverTest test) public static long getTotalVariantFeatures(BaseWebDriverTest test) { - Locator l = Locator.tagWithAttribute("svg", "data-testid", "svgfeatures").append(Locator.tag("polygon")); + final Long winWidth = test.executeScript("return window.outerWidth", Long.class); + final Long winHeight = test.executeScript("return window.outerHeight", Long.class); + final Locator l = Locator.tagWithAttribute("svg", "data-testid", "svgfeatures").append(Locator.tag("polygon")); try { // NOTE: JBrowse renders features using multiple blocks per track, and these tracks can redundantly render identical features on top of one another. // Counting unique locations is indirect, but should result in unique features - return Locator.findElements(test.getDriver(), l).stream().filter(WebElement::isDisplayed).map(WebElement::getLocation).distinct().count(); + return doVariantCount(test, l, winWidth, winHeight); } catch (StaleElementReferenceException e) { test.log("Stale elements, retrying"); WebDriverWrapper.sleep(5000); - return Locator.findElements(test.getDriver(), l).stream().filter(WebElement::isDisplayed).map(WebElement::getLocation).distinct().count(); + return doVariantCount(test, l, winWidth, winHeight); } } + + private static long doVariantCount(BaseWebDriverTest test, Locator l, long winWidth, long winHeight) + { + return Locator.findElements(test.getDriver(), l).stream().filter(WebElement::isDisplayed).map(WebElement::getRect).distinct().filter( + // This is designed to limit to just elements within the viewport: + rec -> rec.x > 0 & rec.x <= winWidth & rec.y > 0 & rec.y <= winHeight + ).count(); + } } From 7746b2b1c61cf45711737d2fafdab727a1c72431 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 6 Nov 2024 07:34:12 -0800 Subject: [PATCH 04/37] Ensure merge.done file is unique for the input VCF --- .../src/org/labkey/sequenceanalysis/util/SequenceUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index 0fbd9a075..6289f959e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -515,7 +515,7 @@ else if (!samples.equals(header.getGenotypeSamples())) writer.write("} | bgzip -f" + (compressionLevel == null ? "" : " --compress-level 9") + (threads == null ? "" : " --threads " + threads) + " > '" + outputGzip.getPath() + "'\n"); } - File mergeDone = new File(outputGzip.getParentFile(), "merge.done"); + File mergeDone = new File(outputGzip.getPath() + ".merge.done"); if (mergeDone.exists()) { log.debug("Merge done file exists, will not repeat merge"); From 2a6016ab4df17e2ddb4f21fed0f9fabf3302a27a Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 7 Nov 2024 10:11:06 -0800 Subject: [PATCH 05/37] Update nimble settings --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 891748ca0..754fdc78a 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -361,7 +361,7 @@ else if ("strict".equals(alignTemplate)) config.put("num_mismatches", 0); config.put("intersect_level", 0); // NOTE: this allows a small amount of mismatched ends: - config.put("score_percent", 0.90); + config.put("score_percent", 0.99); config.put("score_threshold", 45); config.put("score_filter", 25); } From fe0661a7b845863fc8e86cbd0af09afaec2854a4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 9 Nov 2024 07:44:03 -0800 Subject: [PATCH 06/37] Minor code cleanup --- .../analysis/UpdateReadsetFilesHandler.java | 8 ++++---- .../sequenceanalysis/pipeline/CacheGenomePipelineJob.java | 2 +- .../resources/web/singlecell/panel/PoolImportPanel.js | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 9616213a2..c04faaf95 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -131,18 +131,18 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n Set distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet()); if (distinctLibraries.size() > 1) { - throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", "))); + throw new PipelineJobException("File has more than one library in read group(s), found: " + String.join(", ", distinctLibraries)); } Set distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); if (distinctSamples.size() > 1) { - throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", "))); + throw new PipelineJobException("File has more than one sample in read group(s), found: " + String.join(", ", distinctSamples)); } if ( - distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L && - distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L + distinctLibraries.stream().allMatch(x -> x.equals(newRsName)) && + distinctSamples.stream().allMatch(x -> x.equals(newRsName)) ) { throw new PipelineJobException("Sample and library names match in read group(s), aborting"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java index b019b876c..f59d13986 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java @@ -146,7 +146,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { return new CacheGenomesTask(this, job); } diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index b2f56edfc..8cf871dda 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -299,7 +299,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }, hto: function(val, panel){ - if (val === 'N/A' || val === 'NA') { + if (val === 'N/A' || val === 'NA' || val === 'N') { return null; } From bf4ae47053f3a46bc7a4e300f2783cbe2279a7d4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 11 Nov 2024 06:49:10 -0800 Subject: [PATCH 07/37] Bugfix to nimble -v command --- .../src/org/labkey/singlecell/run/NimbleHelper.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 754fdc78a..38939042b 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -711,10 +711,15 @@ private String getVersion(PipelineStepOutput output) throws PipelineJobException throw new PipelineJobException("Unable to find file: " + outFile.getPath()); } - String ret; - try (BufferedReader reader = Readers.getReader(outFile)) + String ret = null; + try { - ret = reader.readLine(); + ret = StringUtils.trimToNull(Files.readString(outFile.toPath())); + if (ret == null) + { + throw new PipelineJobException("nimble -v did not output version"); + } + ret = ret.replaceAll("nimble", "").replaceAll("[\\r\\n]+", ""); } catch (IOException e) { From 5d58e85f62db210ccd52090e375161930dcfb946 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 11 Nov 2024 07:39:51 -0800 Subject: [PATCH 08/37] Support nimble HTML report --- .../labkey/singlecell/run/NimbleHelper.java | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 38939042b..932c47c7c 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -290,6 +290,13 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin output.addSequenceOutput(results, basename + ": nimble align", "Nimble Alignment", rs.getRowId(), null, genome.getGenomeId(), description); + File reportHtml = getReportHtmlFileFromResults(results); + if (!reportHtml.exists()) + { + throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); + } + output.addSequenceOutput(results, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); + File outputBam = new File(results.getPath().replaceAll("results." + genome.genomeId + ".txt.gz", "nimbleAlignment." + genome.genomeId + ".bam")); if (outputBam.exists()) { @@ -497,11 +504,42 @@ private Map doAlignment(List genomes, List plotArgs = new ArrayList<>(); + plotArgs.add("python3"); + plotArgs.add("-m"); + plotArgs.add("nimble"); + + plotArgs.add("plot"); + plotArgs.add("--input_file"); + plotArgs.add("/work/" + reportResultsGz.getName()); + + File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); + if (reportResultsGz.exists()) + { + plotResultsHtml.delete(); + } + + plotArgs.add("--output_file"); + plotArgs.add("/work/" + plotResultsHtml.getName()); + + runUsingDocker(plotArgs, output, null); + + if (!plotResultsHtml.exists()) + { + throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); + } } return resultMap; } + private File getReportHtmlFileFromResults(File reportResultsGz) + { + return new File(reportResultsGz.getPath().replaceAll("txt.gz$", "html$")); + } + private File getNimbleDoneFile(File parentDir, String resumeString) { return new File(parentDir, "nimble." + resumeString + ".done"); From cfef61ede99647386d778a8374bf58df3432561f Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 11 Nov 2024 16:41:31 -0800 Subject: [PATCH 09/37] Improve logging --- .../analysis/UpdateReadsetFilesHandler.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index c04faaf95..06826bc36 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -107,11 +107,11 @@ public void init(JobContext ctx, List inputFiles, List rgs = header.getReadGroups(); String existingSample = rgs.get(0).getSample(); From 7300d1e69a6ad26eb7838fd4ae89c21c85fc5c15 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 11 Nov 2024 16:46:02 -0800 Subject: [PATCH 10/37] Add ability to force cellranger VDJ version 7 --- .../singlecell/run/CellRangerVDJWrapper.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 0b1b9ba8f..36525672c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -84,7 +84,8 @@ public VDJProvider() put("height", 100); put("width", 400); put("allowBlank", false); - }}, null) + }}, null), + ToolParameterDescriptor.create("useCellRanger7", "Use cellranger 7", "If checked, this will use cellranger 7, rather than the current version", "checkbox", null, false) ), null, "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); } @@ -274,7 +275,7 @@ public AlignmentStep.IndexOutput createIndex(ReferenceGenome referenceGenome, Fi output.addInput(getGenomeFasta(), "Input FASTA"); List args = new ArrayList<>(); - args.add(getWrapper().getExe().getPath()); + args.add(getWrapper().getExe(false).getPath()); args.add("mkvdjref"); args.add("--seqs=" + getGenomeFasta().getPath()); args.add("--genome=" + indexDir.getName()); @@ -301,8 +302,10 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp { AlignmentOutputImpl output = new AlignmentOutputImpl(); + boolean useCellRanger7 = getProvider().getParameterByName("useCellRanger7").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + List args = new ArrayList<>(); - args.add(getWrapper().getExe().getPath()); + args.add(getWrapper().getExe(useCellRanger7).getPath()); args.add("multi"); args.add("--disable-ui"); @@ -830,9 +833,9 @@ private String getSampleName(String fn) } } - protected File getExe() + protected File getExe(boolean useCellRanger7) { - return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger"); + return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", useCellRanger7 ? "cellranger7": "cellranger"); } private static void processAndMergeCSVs(File abCSV, File gdCSV, Logger log) throws PipelineJobException @@ -977,7 +980,7 @@ else if (uniqueChains.size() == 2) { try { - String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + String ret = executeWithOutput(Arrays.asList(getExe(false).getPath(), "--version")); return ret.replaceAll("^cellranger ", ""); } From a140399e94d45894b0d83bbfafe07e5c352e7e23 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 11 Nov 2024 20:12:19 -0800 Subject: [PATCH 11/37] Constrain popup dialog height in JBrowse (#300) * Constrain popup dialog height in jbrowse * Fix vertical height calculation for correct overflow --------- Co-authored-by: Sebastian Benjamin --- .../ExtendedVariantWidget/ExtendedVariantWidget.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx index 1fe878691..3c3ddced5 100644 --- a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx +++ b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx @@ -407,7 +407,7 @@ export default jbrowse => { feat["INFO"] = null return ( - + {message} Date: Tue, 12 Nov 2024 06:28:23 -0800 Subject: [PATCH 12/37] Bugfix to nimble version reporting --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 932c47c7c..41d288d7b 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -737,9 +737,7 @@ public Integer getNumMismatches() private String getVersion(PipelineStepOutput output) throws PipelineJobException { List nimbleArgs = new ArrayList<>(); - nimbleArgs.add("/bin/bash"); - nimbleArgs.add("-c"); - nimbleArgs.add("python3 -m nimble -v > /work/nimbleVersion.txt"); + nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > /work/nimbleVersion.txt"); runUsingDocker(nimbleArgs, output, null); From fb878fe20d17cb50315ef0ec8f86d7b668d830ca Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 12 Nov 2024 10:23:23 -0800 Subject: [PATCH 13/37] Add check for commas in start/stop --- jbrowse/resources/views/genotypeTable.html | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/jbrowse/resources/views/genotypeTable.html b/jbrowse/resources/views/genotypeTable.html index 35dd8413e..ce6945c61 100644 --- a/jbrowse/resources/views/genotypeTable.html +++ b/jbrowse/resources/views/genotypeTable.html @@ -5,6 +5,24 @@ var start = LABKEY.ActionURL.getParameter("start"); var stop = LABKEY.ActionURL.getParameter("stop"); + if (!start || !stop) { + alert('Must provide a start and stop!') + return + } + + if (isNaN(start)) { + start = String(start).replaceAll(',', '') + } + + if (isNaN(stop)) { + stop = String(stop).replaceAll(',', '') + } + + if (isNaN(start) || isNaN(stop)) { + alert('Start and stop must be integers!') + return + } + LABKEY.Ajax.request({ url: LABKEY.ActionURL.buildURL('jbrowse', 'getGenotypes', null), method: 'POST', From 12221857c62b3a5ca3a0edfb5a2ddf69ef72b20b Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 12 Nov 2024 14:00:51 -0800 Subject: [PATCH 14/37] Allow per-library setting for retaining ambiguous features --- singlecell/resources/chunks/AppendNimble.R | 5 ++-- .../web/singlecell/panel/NimbleAppendPanel.js | 19 +++++++++++---- .../pipeline/singlecell/AppendNimble.java | 23 +++++++++++++++---- .../labkey/singlecell/run/NimbleHelper.java | 4 ++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 39b5d1635..9b88e2700 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -7,8 +7,8 @@ invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) # NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted -if (file.exists('debug.nimble.txt')) { - unlink('debug.nimble.txt') +if (file.exists('debug.nimble.txt.gz')) { + unlink('debug.nimble.txt.gz') } for (datasetId in names(seuratObjects)) { @@ -16,6 +16,7 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { + retainAmbiguousFeatures <- !nimbleGenomeAmbiguousPreference[[genomeId]] seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) } diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index 30dc0cd5c..9d569c22f 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { initComponent: function(){ Ext4.apply(this, { style: 'padding: 10px;margins: 5px;', - minWidth: 650, + minWidth: 850, border: true, items: [{ html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.', @@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },{ xtype: 'ldk-gridpanel', clicksToEdit: 1, - width: 600, + width: 775, tbar: [{ text: 'Add', handler: function(btn){ @@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },LABKEY.ext4.GRIDBUTTONS.DELETERECORD()], store: { type: 'array', - fields: ['genomeId', 'targetAssay'] + fields: ['genomeId', 'targetAssay','retainAmbiguousFeatures'] }, columns: [{ dataIndex: 'genomeId', @@ -68,6 +68,14 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { xtype: 'textfield', allowBlank: false } + },{ + dataIndex: 'retainAmbiguousFeatures', + width: 175, + header: 'Retain Ambiguous Features?', + editor: { + xtype: 'checkbox', + allowBlank: false + } }] }] }); @@ -78,7 +86,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { getValue: function(){ var ret = []; this.down('ldk-gridpanel').store.each(function(r, i) { - ret.push([r.data.genomeId, r.data.targetAssay]); + ret.push([r.data.genomeId, r.data.targetAssay, !!r.data.retainAmbiguousFeatures]); }, this); return Ext4.isEmpty(ret) ? null : JSON.stringify(ret); @@ -113,7 +121,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { Ext4.Array.forEach(val, function(row){ var rec = grid.store.createModel({ genomeId: row[0], - targetAssay: row[1] + targetAssay: row[1], + retainAmbiguousFeatures: !!row[2] }); grid.store.add(rec); }, this); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index bdfe7cb04..89317c083 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -32,10 +32,6 @@ public Provider() {{ put("allowBlank", false); }}, null), - SeuratToolParameter.create("retainAmbiguousFeatures", "Retain Ambiguous Features", "If checked, features hitting more than one reference will be retained", "checkbox", new JSONObject() - {{ - put("check", false); - }}, false, null, true), SeuratToolParameter.create("ensureSamplesShareAllGenomes", "Ensure Samples Share All Genomes", "If checked, the job will fail unless nimble data is found for each requested genome for all samples", "checkbox", new JSONObject() {{ put("check", true); @@ -76,7 +72,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List doAlignment(List genomes, List Date: Tue, 12 Nov 2024 15:47:09 -0800 Subject: [PATCH 15/37] More granular control over NimbleAppend --- singlecell/resources/chunks/AppendNimble.R | 4 ++-- .../web/singlecell/panel/NimbleAppendPanel.js | 15 ++++++++------- .../pipeline/singlecell/AppendNimble.java | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 9b88e2700..040317e71 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -16,8 +16,8 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { - retainAmbiguousFeatures <- !nimbleGenomeAmbiguousPreference[[genomeId]] - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) + maxAmbiguityAllowed <- !nimbleGenomeAmbiguousPreference[[genomeId]] + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio) } saveData(seuratObj, datasetId) diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index 9d569c22f..f01e307ce 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },LABKEY.ext4.GRIDBUTTONS.DELETERECORD()], store: { type: 'array', - fields: ['genomeId', 'targetAssay','retainAmbiguousFeatures'] + fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed'] }, columns: [{ dataIndex: 'genomeId', @@ -69,12 +69,13 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { allowBlank: false } },{ - dataIndex: 'retainAmbiguousFeatures', + dataIndex: 'maxAmbiguityAllowed', width: 175, - header: 'Retain Ambiguous Features?', + header: 'Max Ambiguity Allowed', editor: { - xtype: 'checkbox', - allowBlank: false + xtype: 'ldk-integerfield', + allowBlank: true, + minValue: 0 } }] }] @@ -86,7 +87,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { getValue: function(){ var ret = []; this.down('ldk-gridpanel').store.each(function(r, i) { - ret.push([r.data.genomeId, r.data.targetAssay, !!r.data.retainAmbiguousFeatures]); + ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '']); }, this); return Ext4.isEmpty(ret) ? null : JSON.stringify(ret); @@ -122,7 +123,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { var rec = grid.store.createModel({ genomeId: row[0], targetAssay: row[1], - retainAmbiguousFeatures: !!row[2] + maxAmbiguityAllowed: row[2] }); grid.store.add(rec); }, this); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 89317c083..b023676bb 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -32,6 +32,10 @@ public Provider() {{ put("allowBlank", false); }}, null), + SeuratToolParameter.create("maxAmbiguityAllowed", "Max Ambiguity Allowed", "If provided, ambiguous features with more than this number of values will be discarded (e.g. if maxAmbiguityAllowed=2, then the feature Feat1,Feat2,Feat3 would be discared, but not Feat1,Feat3. This can be overridden per genome.", "ldk-integerfield", new JSONObject() + {{ + put("minValue", 0); + }}, 0, null, true), SeuratToolParameter.create("ensureSamplesShareAllGenomes", "Ensure Samples Share All Genomes", "If checked, the job will fail unless nimble data is found for each requested genome for all samples", "checkbox", new JSONObject() {{ put("check", true); @@ -84,6 +88,8 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Tue, 12 Nov 2024 16:06:16 -0800 Subject: [PATCH 16/37] Bugfix file handling in SBT genotyping --- .../run/analysis/SequenceBasedTypingAnalysis.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java index 051a508f6..0e22b4c8a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java @@ -204,7 +204,7 @@ public void exec(ResultSet rs) throws SQLException @Override public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException { - File expectedTxt = getSBTSummaryFile(outDir, inputBam); + File expectedTxt = getSBTSummaryFile(outDir, inputBam, false); if (expectedTxt.exists()) { getPipelineCtx().getLogger().info("Processing SBT output: " + expectedTxt.getPath()); @@ -310,9 +310,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc getPipelineCtx().getLogger().info("Inspection complete"); //write output as TSV - agg.writeTable(getSBTSummaryFile(outputDir, inputBam)); + agg.writeTable(getSBTSummaryFile(outputDir, inputBam, false)); - output.addSequenceOutput(sbtOutputLog, "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + // This will be gzipped later: + output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam, true), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); //optionally output FASTQ of unmapped reads Double exportThreshold = getProvider().getParameterByName(EXPORT_UNMAPPED).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); @@ -375,9 +376,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc } } - protected File getSBTSummaryFile(File outputDir, File bam) + protected File getSBTSummaryFile(File outputDir, File bam, boolean doGzip) { - return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt"); + return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt" + (doGzip ? ".gz": "")); } public static class AlignmentGroupCompare From 8d23167c094a7d583c6f71f964af4f3a76f43a97 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 12 Nov 2024 19:08:51 -0800 Subject: [PATCH 17/37] Bugfix to nimble plot --- .../labkey/singlecell/run/NimbleHelper.java | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index e41c317c3..7c2cfecb6 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -505,30 +505,37 @@ private Map doAlignment(List genomes, List plotArgs = new ArrayList<>(); - plotArgs.add("python3"); - plotArgs.add("-m"); - plotArgs.add("nimble"); + if (SequencePipelineService.get().hasMinLineCount(reportResultsGz, 2)) + { + // Also run nimble plot. Always re-run since this is fast: + List plotArgs = new ArrayList<>(); + plotArgs.add("python3"); + plotArgs.add("-m"); + plotArgs.add("nimble"); - plotArgs.add("plot"); - plotArgs.add("--input_file"); - plotArgs.add("/work/" + reportResultsGz.getName()); + plotArgs.add("plot"); + plotArgs.add("--input_file"); + plotArgs.add("/work/" + reportResultsGz.getName()); - File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); - if (reportResultsGz.exists()) - { - plotResultsHtml.delete(); - } + File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); + if (reportResultsGz.exists()) + { + plotResultsHtml.delete(); + } - plotArgs.add("--output_file"); - plotArgs.add("/work/" + plotResultsHtml.getName()); + plotArgs.add("--output_file"); + plotArgs.add("/work/" + plotResultsHtml.getName()); - runUsingDocker(plotArgs, output, null); + runUsingDocker(plotArgs, output, null); - if (!plotResultsHtml.exists()) + if (!plotResultsHtml.exists()) + { + throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); + } + } + else { - throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); + getPipelineCtx().getLogger().info("Only single line found in results, skipping nimble plot"); } } From 852a1d869d0ccc6567b6f0917da78ed9a46972ac Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 13 Nov 2024 09:10:35 -0800 Subject: [PATCH 18/37] Nimble plot uses aligner output --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 7c2cfecb6..3a65fc8d9 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -505,7 +505,7 @@ private Map doAlignment(List genomes, List plotArgs = new ArrayList<>(); @@ -515,7 +515,7 @@ private Map doAlignment(List genomes, List Date: Wed, 13 Nov 2024 14:30:40 -0800 Subject: [PATCH 19/37] Improve verbosity for UpdateReadsetFilesHandler --- .../analysis/UpdateReadsetFilesHandler.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 06826bc36..2b5abaa76 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -13,6 +13,7 @@ import htsjdk.variant.vcf.VCFReader; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; @@ -36,7 +37,6 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; -import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; @@ -148,7 +148,7 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n throw new PipelineJobException("Sample and library names match in read group(s), aborting"); } - log.info("Readset name and header do not match: " + newRsName + " / " + distinctLibraries.stream().distinct().collect(Collectors.joining())); + log.info("Readset name and header do not match: " + newRsName + " / existing library: " + distinctLibraries.stream().distinct().collect(Collectors.joining()) + ", existing sample: " + distinctSamples.stream().distinct().collect(Collectors.joining())); return header; } @@ -261,7 +261,7 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName } FileUtils.moveFile(outputIdx, inputIndex); - addTracker(so, existingSample, newRsName); + addTracker(so, existingSample, newRsName, null); } catch (IOException e) { @@ -269,7 +269,7 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName } } - private void addTracker(SequenceOutputFile so, String existingSample, String newRsName) throws IOException + private void addTracker(SequenceOutputFile so, String existingSample, String newRsName, @Nullable String existingLibrary) throws IOException { File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); boolean preExisting = tracker.exists(); @@ -282,10 +282,10 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new { if (!preExisting) { - writer.println("OriginalSample\tNewSample"); + writer.println("OriginalSample\tNewSample\tOriginalLibrary"); } - writer.println(existingSample + "\t" + newRsName); + writer.println(existingSample + "\t" + newRsName + "\t" + (existingLibrary == null ? "N/A" : existingLibrary)); } } @@ -345,7 +345,7 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new } FileUtils.moveFile(outputIdx, inputIndex); - addTracker(so, existingSample, newRsName); + addTracker(so, existingSample, newRsName, existingLibrary); } catch (IOException e) { From f775096218a5e897120d4651d2ebe90d3cae5ec2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 14 Nov 2024 12:15:06 -0800 Subject: [PATCH 20/37] Update tool versions --- .../pipeline_code/extra_tools_install.sh | 360 +++++------------- .../pipeline_code/sequence_tools_install.sh | 100 +++-- 2 files changed, 170 insertions(+), 290 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index d7162cdfe..a04deedb8 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -1,37 +1,5 @@ #!/bin/bash -# -# -# Copyright (c) 2012 LabKey Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This script is designed to assist with the initial installation of the external informatics tools -# used by the SequenceAnalysis module pipeline. It is a fairly utilitarian script, which has undergone only -# very limited testing. It is designed to provide a template to assist with the install, not a one-click installer. -# This script was written for RHEL6, and has been adapted for Ubuntu. -# -# Prior to using this script, you should run and configure CPAN and the package manager for your OS (ie. yum or apt-get). -# This script is designed to be run as root, using a command similar to: -# -# bash sequence_tools_install.sh -d /usr/local/labkey/ | tee sequence_tools_install.log -# -# NOTE: this script will delete any previously downloaded versions of these tools, assuming they were downloaded to the location -# expected by this script. This is deliberate so that the script can be re-run to perform incremental upgrades of these tools. -# -# -# Variables -# + set -e set -u FORCE_REINSTALL= @@ -85,287 +53,165 @@ echo "LKTOOLS_DIR: $LKTOOLS_DIR" echo "LKSRC_DIR: $LKSRC_DIR" -# -#MORGAN -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install MORGAN" +echo "Install plink2" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/MORGAN || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/plink2 || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf morgan32_release* - rm -Rf morgan_v3* - rm -Rf MORGAN_V* - rm -Rf $LKTOOLS_DIR/MORGAN - - wget http://faculty.washington.edu/eathomp/Anonftp/PANGAEA/MORGAN/morgan_v332_release.tar.gz - gunzip morgan_v332_release.tar.gz - tar -xf morgan_v332_release.tar - echo "Compressing TAR" - gzip morgan_v332_release.tar - cd MORGAN_V332_Release - make morgan + rm -Rf plink2* + rm -Rf $LKTOOLS_DIR/plink2* - cd ../ - cp -R ./MORGAN_V332_Release $LKTOOLS_DIR/MORGAN + wget https://s3.amazonaws.com/plink2-assets/alpha6/plink2_linux_avx2_20241111.zip + unzip plink2_linux_avx2_20241111.zip + + install ./plink2 $LKTOOLS_DIR/plink2 else echo "Already installed" fi +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install minimap2" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/minimap2 || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf minimap2* + rm -Rf $LKTOOLS_DIR/minimap2* + + wget https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28.tar.bz2 + bunzip2 minimap2-2.28.tar.bz2 + tar -xf minimap2-2.28.tar + + cd minimap2-2.28 + make + + install minimap2 $LKTOOLS_DIR/ +else + echo "Already installed" +fi -# -#GIGI -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install GIGI" +echo "Install cellsnp-lite" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/GIGI || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/cellsnp-lite || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf GIGI_v1.06.1* - rm -Rf $LKTOOLS_DIR/GIGI - rm -Rf __MACOSX* + rm -Rf cellsnp-lite* + rm -Rf $LKTOOLS_DIR/cellsnp-lite* - wget https://faculty.washington.edu/wijsman/progdists/gigi/software/GIGI/GIGI_v1.06.1.zip - unzip GIGI_v1.06.1.zip - cd GIGI_v1.06.1 + git clone https://github.com/single-cell-genetics/cellsnp-lite.git + cd cellsnp-lite + autoreconf -iv + ./configure --with-htslib=${LKTOOLS_DIR}/lib make - cd ../ - install ./GIGI_v1.06.1/GIGI $LKTOOLS_DIR/GIGI + install cellsnp-lite $LKTOOLS_DIR/ +else + echo "Already installed" +fi + + +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install sratoolkit" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/fasterq-dump || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf sratoolkit* + rm -Rf $LKTOOLS_DIR/sratoolkit* + rm -Rf $LKTOOLS_DIR/fasterq-dump* + + wget https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/current/sratoolkit.current-centos_linux64.tar.gz + tar -xf sratoolkit.current-centos_linux64.tar.gz + cp -R sratoolkit.3.1.1-centos_linux64 $LKTOOLS_DIR + ln -s ${LKTOOLS_DIR}/sratoolkit.3.1.1-centos_linux64/bin/fasterq-dump ${LKTOOLS_DIR}/fasterq-dump else echo "Already installed" fi -# -# PARalyzer -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install PARalyzer" +echo "Install gffread" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/PARalyzer || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/gffread || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf PARalyzer_v1_5* - rm -Rf $LKTOOLS_DIR/PARalyzer + rm -Rf gffread* + rm -Rf $LKTOOLS_DIR/gffread* - wget --no-check-certificate https://ohlerlab.mdc-berlin.de/files/duke/PARalyzer/PARalyzer_v1_5.tar.gz - gunzip PARalyzer_v1_5.tar.gz - tar -xf PARalyzer_v1_5.tar - gzip PARalyzer_v1_5.tar + wget https://github.com/gpertea/gffread/releases/download/v0.12.7/gffread-0.12.7.Linux_x86_64.tar.gz + tar -xf gffread-0.12.7.Linux_x86_64.tar.gz - install ./PARalyzer_v1_5/PARalyzer $LKTOOLS_DIR/PARalyzer + install ./gffread-0.12.7.Linux_x86_64/gffread $LKTOOLS_DIR/ else echo "Already installed" fi -# -# bwa -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install miRDeep2" +echo "Install paragraph" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/miRDeep2 || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/paragraph || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf mirdeep2_0_0_7* - rm -Rf $LKTOOLS_DIR/miRDeep2.pl + rm -Rf paragraph* + rm -Rf $LKTOOLS_DIR/paragraph* - wget --no-check-certificate https://www.mdc-berlin.de/43969303/en/research/research_teams/systems_biology_of_gene_regulatory_elements/projects/miRDeep/mirdeep2_0_0_7.zip - unzip mirdeep2_0_0_7.zip + mkdir paragraph + cd paragraph + wget https://github.com/Illumina/paragraph/releases/download/v2.4a/paragraph-v2.4a-binary.zip + unzip paragraph-v2.4a-binary.zip + rm paragraph-v2.4a-binary.zip - cp -R ./mirdeep2_0_0_7 $LKTOOLS_DIR/miRDeep2 + cd ../ + cp -R paragraph $LKTOOLS_DIR + ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph + ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py else echo "Already installed" fi -## -##Mira -## -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install Mira Assembler" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/mira || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf mira_4.0rc4_linux-gnu_x86_64* -# rm -Rf mira_4.0.2_linux-gnu_x86_64* -# rm -Rf mira-4.0* -# -# rm -Rf $LKTOOLS_DIR/mira -# rm -Rf $LKTOOLS_DIR/miraconvert -# -# wget http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_linux-gnu_x86_64_static.tar.bz2 -# bunzip2 mira_4.0.2_linux-gnu_x86_64_static.tar.bz2 -# tar -xf mira_4.0.2_linux-gnu_x86_64_static.tar -# echo "Compressing TAR" -# bzip2 mira_4.0.2_linux-gnu_x86_64_static.tar -# cd mira_4.0.2_linux-gnu_x86_64_static -# -# cd $LKTOOLS_DIR -# ln -s ./src/mira_4.0.2_linux-gnu_x86_64_static/bin/mira mira -# ln -s ./src/mira_4.0.2_linux-gnu_x86_64_static/bin/miraconvert miraconvert -#else -# echo "Already installed" -#fi - -## -##velvet -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install velvet" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/velvetg || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf velvet_1.2.09.tgz -# rm -Rf velvet_1.2.09.tar.gz -# rm -Rf velvet_1.2.09.tar -# rm -Rf velvet_1.2.09 -# rm -Rf $LKTOOLS_DIR/velvetg -# rm -Rf $LKTOOLS_DIR/velveth -# -# wget http://www.ebi.ac.uk/~zerbino/velvet/velvet_1.2.09.tgz -# gunzip velvet_1.2.09.tgz -# tar -xf velvet_1.2.09.tar -# echo "Compressing TAR" -# gzip velvet_1.2.09.tar -# cd velvet_1.2.09 -# make OPENMP=1 LONGSEQUENCES=1 -# -# cd $LKTOOLS_DIR -# ln -s ./src/velvet_1.2.09/velvetg velvetg -# ln -s ./src/velvet_1.2.09/velveth velveth -#else -# echo "Already installed" -#fi - -## -##VelvetOptimiser -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Installing VelvetOptimiser" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/VelvetOptimiser.pl || ! -z $FORCE_REINSTALL ]]; -#then -# rm -Rf VelvetOptimiser-2.2.5.tar.gz -# rm -Rf VelvetOptimiser-2.2.5.tar -# rm -Rf VelvetOptimiser-2.2.5 -# rm -Rf $LKTOOLS_DIR/VelvetOptimiser.pl -# -# wget http://www.vicbioinformatics.com/VelvetOptimiser-2.2.5.tar.gz -# gunzip VelvetOptimiser-2.2.5.tar.gz -# tar -xf VelvetOptimiser-2.2.5.tar -# gzip VelvetOptimiser-2.2.5.tar -# cd VelvetOptimiser-2.2.5 -# -# cd $LKTOOLS_DIR -# ln -s ./src/VelvetOptimiser-2.2.5/VelvetOptimiser.pl VelvetOptimiser.pl -#else -# echo "Already installed" -#fi - -## -##AMOS -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Installing AMOS" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/bank-transact || ! -z $FORCE_REINSTALL ]]; -#then -# rm -Rf amos-3.1.0.tar.gz -# rm -Rf amos-3.1.0.tar -# rm -Rf amos-3.1.0 -# rm -Rf $LKTOOLS_DIR/bank2fasta -# rm -Rf $LKTOOLS_DIR/bank2contig -# rm -Rf $LKTOOLS_DIR/bank-transact -# -# wget http://downloads.sourceforge.net/project/amos/amos/3.1.0/amos-3.1.0.tar.gz -# gunzip amos-3.1.0.tar.gz -# tar -xf amos-3.1.0.tar -# cd amos-3.1.0 -# ./configure -# make -# make install -# -# cd $LKTOOLS_DIR -# ln -s ./src/amos-3.1.0/bin/bank2fasta bank2fasta -# ln -s ./src/amos-3.1.0/bin/bank2contig bank2contig -# ln -s ./src/amos-3.1.0/bin/bank-transact bank-transact -#else -# echo "Already installed" -#fi - -# -# htseq -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install htseq" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/htseq || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf STAR_2.4* -# rm -Rf $LKTOOLS_DIR/STAR -# -# if [ -n $SKIP_PACKAGE_MANAGER ]; then -# echo "Skipping package install" -# elif [ $(which apt-get) ]; then -# apt-get install build-essential python2.7-dev python-numpy python-matplotlib -# elif [ $(which yum) ]; then -# yum install python-devel numpy python-matplotlib -# fi -# -# wget https://pypi.python.org/packages/source/H/HTSeq/HTSeq-0.6.1.tar.gz -# gunzip HTSeq-0.6.1.tar.gz -# tar -xf HTSeq-0.6.1.tar -# gzip HTSeq-0.6.1.tar -# -# cd HTSeq-0.6.1 -# python setup.py install --user -#else -# echo "Already installed" -#fi +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install sniffles2" +echo "" +cd $LKSRC_DIR +if [[ ! -e ${LKTOOLS_DIR}/sniffles || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf sniffles* + rm -Rf $LKTOOLS_DIR/sniffles* + + module load python/3.11.7 + python -m ensurepip --upgrade + python -m pip install --force-reinstall --target ${LKTOOLS_DIR}/pythonPackages git+https://github.com/fritzsedlazeck/Sniffles.git +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 12d7321cb..9932d7393 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -146,18 +146,49 @@ if [[ ! -e ${LKTOOLS_DIR}/bwa || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" rm -Rf bwa-0.* + rm -Rf bwa.zip + rm -Rf v0.7.18* rm -Rf $LKTOOLS_DIR/bwa - wget $WGET_OPTS -O bwa.zip https://github.com/lh3/bwa/zipball/master/ - unzip bwa.zip - DIRNAME=`ls | grep lh3-bwa` - cd $DIRNAME + wget $WGET_OPTS https://github.com/lh3/bwa/archive/refs/tags/v0.7.18.tar.gz + tar -xf v0.7.18.tar.gz + + cd bwa-0.7.18 make install bwa $LKTOOLS_DIR/ else echo "Already installed" fi + +# +# bwa-mem2 +# +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install BWA-mem2" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/bwa-mem2 || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf bwa-mem2* + rm -Rf $LKTOOLS_DIR/bwa-mem2* + + wget $WGET_OPTS https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 + bunzip2 bwa-mem2-2.2.1_x64-linux.tar.bz2 + tar -xf bwa-mem2-2.2.1_x64-linux.tar + + # NOTE: all executables are needed: + install bwa-mem2-2.2.1_x64-linux/bwa-mem2* $LKTOOLS_DIR/ +else + echo "Already installed" +fi + + + # # gffread # @@ -274,10 +305,10 @@ then rm -Rf gatk-4* rm -Rf $LKTOOLS_DIR/GenomeAnalysisTK4.jar - wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.4.0.0/gatk-4.4.0.0.zip - unzip gatk-4.4.0.0.zip + wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.6.1.0/gatk-4.6.1.0.zip + unzip gatk-4.6.1.0.zip - cp ./gatk-4.4.0.0/gatk-package-4.4.0.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar + cp ./gatk-4.6.1.0/gatk-package-4.6.1.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar else echo "Already installed" fi @@ -298,14 +329,13 @@ then echo "Cleaning up previous installs" rm -Rf STAR_2.* rm -Rf $LKTOOLS_DIR/STAR + rm -Rf $LKTOOLS_DIR/STARlong - wget $WGET_OPTS https://github.com/alexdobin/STAR/archive/2.7.10b.tar.gz - gunzip 2.7.10b.tar.gz - tar -xf 2.7.10b.tar - gzip 2.7.10b.tar + wget $WGET_OPTS https://github.com/alexdobin/STAR/releases/download/2.7.11b/STAR_2.7.11b.zip + unzip STAR_2.7.11b.zip - install ./STAR-2.7.10b/bin/Linux_x86_64_static/STAR $LKTOOLS_DIR/STAR - install ./STAR-2.7.10b/bin/Linux_x86_64_static/STARlong $LKTOOLS_DIR/STARlong + install ./STAR_2.7.11b/Linux_x86_64/STAR $LKTOOLS_DIR/STAR + install ./STAR_2.7.11b/Linux_x86_64/STARlong $LKTOOLS_DIR/STARlong else echo "Already installed" fi @@ -433,12 +463,13 @@ then rm -Rf $LKTOOLS_DIR/samtools rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/1.20/samtools-1.20.tar.bz2 - bunzip2 samtools-1.20.tar.bz2 - tar -xf samtools-1.20.tar + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/${ST_VERSION}/samtools-${ST_VERSION}.tar.bz2 + bunzip2 samtools-${ST_VERSION}.tar.bz2 + tar -xf samtools-${ST_VERSION}.tar echo "Compressing TAR" - bzip2 samtools-1.20.tar - cd samtools-1.20 + bzip2 samtools-${ST_VERSION}.tar + cd samtools-${ST_VERSION} ./configure make install ./samtools ${LKTOOLS_DIR}/samtools @@ -462,10 +493,11 @@ then rm -Rf bcftools* rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2 - tar xjvf bcftools-1.20.tar.bz2 - chmod 755 bcftools-1.20 - cd bcftools-1.20 + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/${ST_VERSION}/bcftools-${ST_VERSION}.tar.bz2 + tar xjvf bcftools-${ST_VERSION}.tar.bz2 + chmod 755 bcftools-${ST_VERSION} + cd bcftools-${ST_VERSION} rm -f plugins/liftover.c wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c @@ -495,15 +527,17 @@ then rm -Rf $LKTOOLS_DIR/tabix rm -Rf $LKTOOLS_DIR/bgzip - wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2 - bunzip2 htslib-1.20.tar.bz2 - tar -xf htslib-1.20.tar + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/${ST_VERSION}/htslib-${ST_VERSION}.tar.bz2 + bunzip2 htslib-${ST_VERSION}.tar.bz2 + tar -xf htslib-${ST_VERSION}.tar echo "Compressing TAR" - bzip2 htslib-1.20.tar - chmod 755 htslib-1.20 - cd htslib-1.20 - ./configure + bzip2 htslib-${ST_VERSION}.tar + chmod 755 htslib-${ST_VERSION} + cd htslib-${ST_VERSION} + ./configure --prefix=${LKTOOLS_DIR}/lib make + make install install ./tabix $LKTOOLS_DIR install ./bgzip $LKTOOLS_DIR @@ -528,7 +562,7 @@ then rm -Rf bedtools* rm -Rf $LKTOOLS_DIR/bedtools - wget -O bedtools $WGET_OPTS https://github.com/arq5x/bedtools2/releases/download/v2.30.0/bedtools.static.binary + wget -O bedtools $WGET_OPTS https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static chmod +x bedtools install ./bedtools ${LKTOOLS_DIR}/bedtools @@ -645,7 +679,7 @@ then rm -Rf $LKTOOLS_DIR/htsjdk-* rm -Rf $LKTOOLS_DIR/libIntelDeflater.so - wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/3.0.0/picard.jar + wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/3.3.0/picard.jar cp -R ./picard.jar $LKTOOLS_DIR/ else @@ -1063,8 +1097,8 @@ cd $LKSRC_DIR if [[ ! -e ${LKTOOLS_DIR}/lofreq || ! -z $FORCE_REINSTALL ]]; then - rm -Rf lofreq_star* - rm -Rf $LKTOOLS_DIR/lofreq_star* + rm -Rf lofreq* + rm -Rf $LKTOOLS_DIR/lofreq* wget $WGET_OPTS https://github.com/CSB5/lofreq/raw/master/dist/lofreq_star-2.1.4_linux-x86-64.tgz tar -xf lofreq_star-2.1.4_linux-x86-64.tgz From 2adfa0ed4bcdac7bf878f06005fe91be50d79967 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 15 Nov 2024 11:55:10 -0800 Subject: [PATCH 21/37] Only throw error for missing nimble report if the alignment generated results --- .../src/org/labkey/singlecell/run/NimbleHelper.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 3a65fc8d9..9f0ff010f 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -293,9 +293,15 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin File reportHtml = getReportHtmlFileFromResults(results); if (!reportHtml.exists()) { - throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); + if (SequencePipelineService.get().hasMinLineCount(results, 2)) + { + throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); + } + } + else + { + output.addSequenceOutput(results, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); } - output.addSequenceOutput(results, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); File outputBam = new File(results.getPath().replaceAll("results." + genome.genomeId + ".txt.gz", "nimbleAlignment." + genome.genomeId + ".bam")); if (outputBam.exists()) From 91076c969635a13010c60ba1321a8b7cdb845a3c Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 15 Nov 2024 12:14:50 -0800 Subject: [PATCH 22/37] Always use txt.gz file for SBT --- ...equenceBasedTypingAlignmentAggregator.java | 41 +++---------------- .../analysis/SequenceBasedTypingAnalysis.java | 16 +++----- 2 files changed, 11 insertions(+), 46 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java index 33c559daf..d2273f700 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java @@ -17,24 +17,19 @@ import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.CSVWriter; -import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; -import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.fastq.FastqReader; import htsjdk.samtools.fastq.FastqRecord; import htsjdk.samtools.fastq.FastqWriter; import htsjdk.samtools.fastq.FastqWriterFactory; import htsjdk.samtools.reference.IndexedFastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalList; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; import org.labkey.api.data.Container; import org.labkey.api.data.DbScope; import org.labkey.api.data.SimpleFilter; @@ -46,12 +41,9 @@ import org.labkey.api.reader.Readers; import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.model.AnalysisModel; -import org.labkey.api.util.FileType; import org.labkey.api.util.Pair; -import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; -import org.labkey.sequenceanalysis.api.picard.CigarPositionIterable; import org.labkey.sequenceanalysis.run.alignment.FastqCollapser; import org.labkey.sequenceanalysis.run.util.FlashWrapper; import org.labkey.sequenceanalysis.run.util.NTSnp; @@ -59,16 +51,9 @@ import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.BufferedReader; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; import java.io.PrintWriter; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -81,7 +66,6 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; -import java.util.zip.GZIPOutputStream; /** * User: bimber @@ -303,22 +287,9 @@ public String getKey(SAMRecord record) ; } - public OutputStream getLogOutputStream(File outputLog) throws IOException - { - FileType gz = new FileType(".gz"); - if (gz.isType(outputLog)) - { - return new GZIPOutputStream(new FileOutputStream(outputLog)); - } - else - { - return new FileOutputStream(outputLog); - } - } - public Map getAlignmentSummary(File outputLog) throws IOException, PipelineJobException { - try (CSVWriter writer = outputLog == null ? null : new CSVWriter(new BufferedWriter(new OutputStreamWriter(getLogOutputStream(outputLog), StandardCharsets.UTF_8)), '\t', CSVWriter.NO_QUOTE_CHARACTER)) + try (CSVWriter writer = outputLog == null ? null : new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(outputLog), '\t', CSVWriter.NO_QUOTE_CHARACTER)) { //these are stage-1 filters, filtering on the read-pair level Map totals = doFilterStage1(writer); @@ -899,7 +870,7 @@ private Map doFilterStage4(CSVWriter writer, Map return stage4Totals; } - private class HitSet + private static class HitSet { public Set readNames = new HashSet<>(); public Set refNames = new TreeSet<>(); @@ -1047,7 +1018,7 @@ public void writeOutput(User u, Container c, AnalysisModel model) public static void processSBTSummary(User u, Container c, AnalysisModel model, File output, File refFasta, Logger log) throws PipelineJobException { - try (CSVReader reader = new CSVReader(new BufferedReader(new InputStreamReader(new FileInputStream(output), StandardCharsets.UTF_8)), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) + try (CSVReader reader = new CSVReader(IOUtil.openFileForBufferedUtf8Reading(output), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) { try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction()) { @@ -1117,7 +1088,7 @@ public static void processSBTSummary(User u, Container c, AnalysisModel model, F public void writeTable(File output) throws PipelineJobException { - try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(output), '\t')) + try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(output), '\t')) { Map map = writeSummary(); @@ -1326,7 +1297,7 @@ else if (f.getName().contains("_2")) //rename reads to make it easier to combine later File renamed = new File(outDir, basename + ".collapsed.tmp.fasta"); - try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(renamed), StringUtilsLabKey.DEFAULT_CHARSET)))) + try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(renamed))) { String line; while ((line = reader.readLine()) != null) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java index 0e22b4c8a..3ad16b82a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java @@ -204,19 +204,13 @@ public void exec(ResultSet rs) throws SQLException @Override public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException { - File expectedTxt = getSBTSummaryFile(outDir, inputBam, false); + File expectedTxt = getSBTSummaryFile(outDir, inputBam); if (expectedTxt.exists()) { getPipelineCtx().getLogger().info("Processing SBT output: " + expectedTxt.getPath()); SequenceBasedTypingAlignmentAggregator.processSBTSummary(getPipelineCtx().getJob().getUser(), getPipelineCtx().getJob().getContainer(), model, expectedTxt, referenceFasta, getPipelineCtx().getLogger()); - File compressed = Compress.compressGzip(expectedTxt); - if (compressed.exists() && expectedTxt.exists()) - { - expectedTxt.delete(); - } - // Perform second pass to collapse groups: new AlignmentGroupCompare(model.getAnalysisId(), getPipelineCtx().getJob().getContainer(), getPipelineCtx().getJob().getUser()).collapseGroups(getPipelineCtx().getLogger(), getPipelineCtx().getJob().getUser()); } @@ -310,10 +304,10 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc getPipelineCtx().getLogger().info("Inspection complete"); //write output as TSV - agg.writeTable(getSBTSummaryFile(outputDir, inputBam, false)); + agg.writeTable(getSBTSummaryFile(outputDir, inputBam)); // This will be gzipped later: - output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam, true), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); //optionally output FASTQ of unmapped reads Double exportThreshold = getProvider().getParameterByName(EXPORT_UNMAPPED).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); @@ -376,9 +370,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc } } - protected File getSBTSummaryFile(File outputDir, File bam, boolean doGzip) + protected File getSBTSummaryFile(File outputDir, File bam) { - return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt" + (doGzip ? ".gz": "")); + return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt.gz"); } public static class AlignmentGroupCompare From 56122dd24716c1268e3379bfdb69eb5bbece8513 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 15 Nov 2024 15:46:59 -0800 Subject: [PATCH 23/37] Switch docker to conditionally mount volumes based on provider --- .../pipeline/JobResourceSettings.java | 3 + .../pipeline/PipelineContext.java | 4 +- .../pipeline/SequencePipelineService.java | 4 + .../sequenceanalysis/run/DockerWrapper.java | 6 +- .../SequenceAnalysisModule.java | 68 ++++++++++++++- .../SequencePipelineServiceImpl.java | 25 ++++++ .../analysis/GLNexusHandler.java | 5 +- .../pipeline/JobContextImpl.java | 6 ++ .../pipeline/SequenceJob.java | 14 ++++ .../pipeline/SequenceTaskHelper.java | 6 ++ .../run/alignment/BWAMem2Wrapper.java | 83 +++++++++++++++++++ .../run/alignment/ParagraphStep.java | 7 +- .../run/analysis/DeepVariantAnalysis.java | 5 +- .../run/analysis/LofreqAnalysis.java | 4 +- .../run/analysis/NextCladeHandler.java | 6 +- .../run/analysis/PangolinHandler.java | 10 ++- .../AbstractSingleCellPipelineStep.java | 1 + .../singlecell/CellHashingServiceImpl.java | 6 +- .../labkey/singlecell/run/NimbleHelper.java | 1 + 19 files changed, 243 insertions(+), 21 deletions(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java index 2aa188ed2..2f7a9f122 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java @@ -2,6 +2,7 @@ import org.labkey.api.data.Container; +import java.util.Collection; import java.util.List; /** @@ -12,4 +13,6 @@ public interface JobResourceSettings boolean isAvailable(Container c); List getParams(); + + Collection getDockerVolumes(Container c); } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java index 506cb6ea4..af718ce46 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java @@ -16,11 +16,11 @@ package org.labkey.api.sequenceanalysis.pipeline; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.WorkDirectory; import java.io.File; +import java.util.Collection; /** * User: bimber @@ -51,4 +51,6 @@ public interface PipelineContext * This is the directory where the source files were located. In the situation where this is a split job, forceParent=true will return the parent job's sourceDirectory. This can be important if files are written here prior to split. */ File getSourceDirectory(boolean forceParent); + + Collection getDockerVolumes(); } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index f2e52eb19..64097cd84 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -19,6 +19,7 @@ import htsjdk.samtools.SAMFileHeader; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; +import org.labkey.api.data.Container; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceOutputFile; @@ -26,6 +27,7 @@ import java.io.File; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; @@ -98,6 +100,8 @@ static public void setInstance(SequencePipelineService instance) */ abstract public String getDockerCommand(); + abstract public Collection getDockerVolumes(Container c); + abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 245274230..8da448235 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -4,6 +4,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.writer.PrintWriters; @@ -17,12 +18,14 @@ public class DockerWrapper extends AbstractCommandWrapper { private final String _containerName; + private final PipelineContext _ctx; private File _tmpDir = null; - public DockerWrapper(String containerName, Logger log) + public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { super(log); _containerName = containerName; + _ctx = ctx; } public void setTmpDir(File tmpDir) @@ -49,6 +52,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + _ctx.getDockerVolumes().forEach(writer::println); if (_tmpDir != null) { writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 9b4cfb4c5..338bf6939 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -67,10 +67,31 @@ import org.labkey.sequenceanalysis.button.DownloadSraButton; import org.labkey.sequenceanalysis.button.ReprocessLibraryButton; import org.labkey.sequenceanalysis.button.RunMultiQCButton; -import org.labkey.sequenceanalysis.pipeline.*; +import org.labkey.sequenceanalysis.pipeline.AlignmentAnalysisJob; +import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob; +import org.labkey.sequenceanalysis.pipeline.CacheGenomePipelineJob; +import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger; +import org.labkey.sequenceanalysis.pipeline.ConvertToCramHandler; +import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob; +import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob; +import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob; +import org.labkey.sequenceanalysis.pipeline.OrphanFilePipelineProvider; +import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; +import org.labkey.sequenceanalysis.pipeline.ReadsetImportJob; +import org.labkey.sequenceanalysis.pipeline.ReblockGvcfHandler; +import org.labkey.sequenceanalysis.pipeline.ReferenceLibraryPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequenceAlignmentJob; +import org.labkey.sequenceanalysis.pipeline.SequenceAlignmentTask; +import org.labkey.sequenceanalysis.pipeline.SequenceJob; +import org.labkey.sequenceanalysis.pipeline.SequenceJobSupportImpl; +import org.labkey.sequenceanalysis.pipeline.SequenceOutputHandlerPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequencePipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequenceReadsetHandlerPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob; import org.labkey.sequenceanalysis.query.SequenceAnalysisUserSchema; import org.labkey.sequenceanalysis.query.SequenceTriggerHelper; import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; +import org.labkey.sequenceanalysis.run.alignment.BWAMem2Wrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper; import org.labkey.sequenceanalysis.run.alignment.BWASWWrapper; import org.labkey.sequenceanalysis.run.alignment.BWAWrapper; @@ -82,7 +103,27 @@ import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; -import org.labkey.sequenceanalysis.run.analysis.*; +import org.labkey.sequenceanalysis.run.analysis.BamIterator; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep; +import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; +import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis; +import org.labkey.sequenceanalysis.run.analysis.GenrichStep; +import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis; +import org.labkey.sequenceanalysis.run.analysis.ImmunoGenotypingAnalysis; +import org.labkey.sequenceanalysis.run.analysis.LofreqAnalysis; +import org.labkey.sequenceanalysis.run.analysis.MergeLoFreqVcfHandler; +import org.labkey.sequenceanalysis.run.analysis.NextCladeHandler; +import org.labkey.sequenceanalysis.run.analysis.PARalyzerAnalysis; +import org.labkey.sequenceanalysis.run.analysis.PangolinHandler; +import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis; +import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler; +import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis; +import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler; +import org.labkey.sequenceanalysis.run.analysis.ViralAnalysis; import org.labkey.sequenceanalysis.run.assembly.TrinityRunner; import org.labkey.sequenceanalysis.run.bampostprocessing.AddOrReplaceReadGroupsStep; import org.labkey.sequenceanalysis.run.bampostprocessing.BaseQualityScoreRecalibrator; @@ -116,7 +157,27 @@ import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler; import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; import org.labkey.sequenceanalysis.run.util.SVAnnotateStep; -import org.labkey.sequenceanalysis.run.variant.*; +import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; +import org.labkey.sequenceanalysis.run.variant.GenotypeConcordanceStep; +import org.labkey.sequenceanalysis.run.variant.GenotypeFiltrationStep; +import org.labkey.sequenceanalysis.run.variant.KingInferenceStep; +import org.labkey.sequenceanalysis.run.variant.MendelianViolationReportStep; +import org.labkey.sequenceanalysis.run.variant.MergeVcfsAndGenotypesHandler; +import org.labkey.sequenceanalysis.run.variant.MultiAllelicPositionsHandler; +import org.labkey.sequenceanalysis.run.variant.PlinkPcaStep; +import org.labkey.sequenceanalysis.run.variant.SNPEffStep; +import org.labkey.sequenceanalysis.run.variant.SampleRenameStep; +import org.labkey.sequenceanalysis.run.variant.SelectSNVsStep; +import org.labkey.sequenceanalysis.run.variant.SelectSamplesStep; +import org.labkey.sequenceanalysis.run.variant.SelectVariantsStep; +import org.labkey.sequenceanalysis.run.variant.SplitVcfBySamplesStep; +import org.labkey.sequenceanalysis.run.variant.SummarizeGenotypeQualityStep; +import org.labkey.sequenceanalysis.run.variant.VariantAnnotatorStep; +import org.labkey.sequenceanalysis.run.variant.VariantEvalBySampleStep; +import org.labkey.sequenceanalysis.run.variant.VariantEvalStep; +import org.labkey.sequenceanalysis.run.variant.VariantFiltrationStep; +import org.labkey.sequenceanalysis.run.variant.VariantQCStep; +import org.labkey.sequenceanalysis.run.variant.VariantsToTableStep; import org.labkey.sequenceanalysis.util.Barcoder; import org.labkey.sequenceanalysis.util.ChainFileValidator; import org.labkey.sequenceanalysis.util.ScatterGatherUtils; @@ -237,6 +298,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new BowtieWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new Bowtie2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWAMemWrapper.Provider()); + SequencePipelineService.get().registerPipelineStep(new BWAMem2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWAWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWASWWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new MosaikWrapper.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index 8c9142869..9716cac61 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -8,6 +8,7 @@ import org.jetbrains.annotations.Nullable; import org.junit.Assert; import org.junit.Test; +import org.labkey.api.data.Container; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineJobService; @@ -43,6 +44,7 @@ import java.lang.reflect.ParameterizedType; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -457,6 +459,29 @@ public String getDockerCommand() return "docker"; } + @Override + public Collection getDockerVolumes(Container c) + { + if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer) + { + throw new IllegalArgumentException("SequencePipelineService.getDockerVolumes() should only be called from the webserver!"); + } + + Set volumeLines = new HashSet<>(); + for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings()) + { + if (settings.isAvailable(c)) + { + for (String volume : settings.getDockerVolumes(c)) + { + volumeLines.add("-v '" + volume + "':'" + volume + "'"); + } + } + } + + return volumeLines; + } + @Override public List getSequenceJobInputFiles(PipelineJob job) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 7d6f6e9e1..bd4e1cb29 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -191,7 +191,7 @@ else if (genomeIds.isEmpty()) { ctx.getLogger().debug("Running GLNexus for contig: " + r.getSequenceName()); ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + r.getSequenceName()); - new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, contigVcf, ctx.getFileManager(), binVersion, configType, r); + new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, contigVcf, ctx.getFileManager(), binVersion, configType, r, ctx); vcfs.add(contigVcf); try { @@ -261,7 +261,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec) throws PipelineJobException + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException { File workDir = outputVcf.getParentFile(); tracker.addIntermediateFile(outputVcf); @@ -291,6 +291,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + ctx.getDockerVolumes().forEach(writer::println); writer.println("\t -w /work \\"); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java index cd40c8a6f..1c09535e6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java @@ -14,6 +14,7 @@ import java.io.File; import java.util.Arrays; +import java.util.Collection; import java.util.LinkedHashSet; /** @@ -128,4 +129,9 @@ public LinkedHashSet getActions() { return _actions; } + + public Collection getDockerVolumes() + { + return _job.getDockerVolumes(); + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index f4c1c4826..e1cc31cbd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -36,6 +36,7 @@ import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.HasJobParams; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.settings.AppProps; import org.labkey.api.util.FileType; import org.labkey.api.util.FileUtil; @@ -73,6 +74,7 @@ public class SequenceJob extends PipelineJob implements FileAnalysisJobSupport, private List _inputFiles; private List _outputsToCreate = new ArrayList<>(); private PipeRoot _folderFileRoot; + private Collection _dockerVolumes; transient private JSONObject _params; @@ -104,6 +106,7 @@ protected SequenceJob(SequenceJob parentJob, String jobName, String subdirectory _folderPrefix = parentJob._folderPrefix; _inputFiles = parentJob._inputFiles; _folderFileRoot = parentJob._folderFileRoot; + _dockerVolumes = parentJob._dockerVolumes; _params = parentJob.getParameterJson(); @@ -133,6 +136,7 @@ public SequenceJob(String providerName, Container c, User u, @Nullable String jo writeParameters(params); _folderFileRoot = c.isWorkbook() ? PipelineService.get().findPipelineRoot(c.getParent()) : pipeRoot; + _dockerVolumes = SequencePipelineService.get().getDockerVolumes(c); setLogFile(_getLogFile()); writeSupportToDisk(); @@ -182,6 +186,16 @@ public void setFolderFileRoot(PipeRoot folderFileRoot) _folderFileRoot = folderFileRoot; } + public Collection getDockerVolumes() + { + return Collections.unmodifiableCollection(_dockerVolumes); + } + + public void setDockerVolumes(Collection dockerVolumes) + { + _dockerVolumes = dockerVolumes; + } + public void setDescription(String description) { _description = description; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java index 8ee1415e6..97b1ad53f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java @@ -50,6 +50,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -344,4 +345,9 @@ public void cacheExpDatasForParams() throws PipelineJobException } } } + + public Collection getDockerVolumes() + { + return _job.getDockerVolumes(); + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java new file mode 100644 index 000000000..e57fdf757 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -0,0 +1,83 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * User: bimber + * Date: 6/14/2014 + * Time: 8:35 AM + */ +public class BWAMem2Wrapper extends BWAMemWrapper +{ + public BWAMem2Wrapper(@Nullable Logger logger) + { + super(logger); + } + + public static class BWAMem2AlignmentStep extends BWAAlignmentStep + { + public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger())); + } + + @Override + public boolean doAddReadGroups() + { + return false; + } + } + + public static class Provider extends AbstractAlignmentStepProvider + { + public Provider() + { + super("BWA-Mem2", null, Arrays.asList( + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-a"), "outputAll", "Output All Hits", "Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments.", "checkbox", new JSONObject(){{ + put("checked", false); + }}, true), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-M"), "markSplit", "Mark Shorter Hits As Secondary", "Mark shorter split hits as secondary (for Picard compatibility).", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-k"), "minSeedLength", "Min Seed Length", "Matches shorter than this value will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. Default value: 19", "ldk-integerfield", new JSONObject(){{ + + }}, null) + ), null, "https://github.com/bwa-mem2/bwa-mem2", true, true); + } + + @Override + public BWAMem2AlignmentStep create(PipelineContext context) + { + return new BWAMem2AlignmentStep(this, context); + } + } + + @Override + public File getExe() + { + return SequencePipelineService.get().getExeForPackage("BWAPATH", "bwa-mem2"); + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index a6f4605a6..fa1335126 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -42,7 +42,10 @@ public ParagraphStep() ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() {{ put("allowBlank", false); - }}, null) + }}, null), + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, false) )); } @@ -170,7 +173,7 @@ else if (!svVcf.exists()) } ctx.getFileManager().addIntermediateFile(coverageFile); - DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger()); + DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index b49680702..667eb97b2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -158,7 +158,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc getWrapper().setOutputDir(outputDir); getWrapper().setWorkingDir(outputDir); - getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, output, binVersion, args); + getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, output, binVersion, args, getPipelineCtx()); output.addOutput(outputFile, "gVCF File"); output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), "DeepVariant Version: " + binVersion); @@ -220,7 +220,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs) throws PipelineJobException + public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs, PipelineContext ctx) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); File outputVcf = new File(outputGvcf.getPath().replaceAll(".g.vcf", ".vcf")); @@ -270,6 +270,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + ctx.getDockerVolumes().forEach(writer::println); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java index dc1f43a6c..b561b320b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java @@ -779,9 +779,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc if (runPangolinAndNextClade) { PangolinHandler.PANGO_MODE pangoMode = PangolinHandler.PANGO_MODE.valueOf(getProvider().getParameterByName(PangolinHandler.PANGO_MODE.class.getSimpleName()).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, PangolinHandler.PANGO_MODE.both.name())); - pangolinData = PangolinHandler.runPangolin(outputDir, consensusFastaLoFreq, output, getPipelineCtx().getLogger(), pangoMode); + pangolinData = PangolinHandler.runPangolin(outputDir, consensusFastaLoFreq, output, getPipelineCtx().getLogger(), pangoMode, getPipelineCtx()); - File json = NextCladeHandler.runNextClade(consensusFastaLoFreq, getPipelineCtx().getLogger(), output, outputDir); + File json = NextCladeHandler.runNextClade(consensusFastaLoFreq, getPipelineCtx().getLogger(), output, outputDir, getPipelineCtx()); output.addSequenceOutput(json, "Nextclade: " + rs.getName(), "NextClade JSON", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index df8ab3ee7..0124cc0ae 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -26,6 +26,7 @@ import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; @@ -127,7 +128,7 @@ public void processFilesRemote(List inputFiles, JobContext c { for (SequenceOutputFile so : inputFiles) { - File nextCladeJson = runNextClade(so.getFile(), ctx.getLogger(), ctx.getFileManager(), ctx.getWorkingDirectory()); + File nextCladeJson = runNextClade(so.getFile(), ctx.getLogger(), ctx.getFileManager(), ctx.getWorkingDirectory(), ctx); ctx.getFileManager().addSequenceOutput(nextCladeJson, "Nextclade: " + so.getName(), NEXTCLADE_JSON, so.getReadset(), null, so.getLibrary_id(), null); } } @@ -138,7 +139,7 @@ public static File getJsonFile(File outputDir, File consensusFasta) return new File(outputDir, FileUtil.getBaseName(consensusFasta) + ".json"); } - public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir) throws PipelineJobException + public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir, PipelineContext ctx) throws PipelineJobException { if (!consensusFasta.getParentFile().equals(outputDir)) { @@ -183,6 +184,7 @@ public static File runNextClade(File consensusFasta, Logger log, PipelineOutputT } writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(writer::println); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index 00410a095..f2055e5a5 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -26,6 +26,7 @@ import org.labkey.api.reader.Readers; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; @@ -235,7 +236,7 @@ public void processFilesRemote(List inputFiles, JobContext c for (SequenceOutputFile so : inputFiles) { PangolinHandler.PANGO_MODE pangoMode = PangolinHandler.PANGO_MODE.valueOf(ctx.getParams().optString(PangolinHandler.PANGO_MODE.class.getSimpleName(), PANGO_MODE.both.name())); - Map pangolinData = runPangolin(ctx.getWorkingDirectory(), so.getFile(), ctx.getFileManager(), ctx.getLogger(), pangoMode); + Map pangolinData = runPangolin(ctx.getWorkingDirectory(), so.getFile(), ctx.getFileManager(), ctx.getLogger(), pangoMode, ctx); List vals = new ArrayList<>(); vals.add(String.valueOf(so.getRowid())); for (String key : PANGO_FIELDS) @@ -263,7 +264,7 @@ public static File getRenamedPangolinOutput(File consensusFasta, PANGO_MODE mode return new File(consensusFasta.getParentFile(), FileUtil.getBaseName(consensusFasta) + "." + mode.name() + ".pangolin.csv"); } - private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs) throws PipelineJobException + private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs, PipelineContext ctx) throws PipelineJobException { if (!consensusFasta.getParentFile().equals(outputDir)) { @@ -307,6 +308,7 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(writer::println); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); @@ -335,7 +337,7 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas return output; } - public static Map runPangolin(File workDir, File consensusFasta, PipelineOutputTracker tracker, Logger log, PANGO_MODE pangoMode) throws PipelineJobException + public static Map runPangolin(File workDir, File consensusFasta, PipelineOutputTracker tracker, Logger log, PANGO_MODE pangoMode, PipelineContext ctx) throws PipelineJobException { List modes = PANGO_MODE.getModes(pangoMode); @@ -344,7 +346,7 @@ public static Map runPangolin(File workDir, File consensusFasta, for (PANGO_MODE mode : modes) { List extraArgs = mode == PANGO_MODE.usher ? Collections.singletonList("--usher") : null; - File output = runUsingDocker(workDir, log, consensusFasta, tracker, extraArgs); + File output = runUsingDocker(workDir, log, consensusFasta, tracker, extraArgs, ctx); try { diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index d97f01de3..1f2b4fabe 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -350,6 +350,7 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); + ctx.getDockerVolumes().forEach(writer::println); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 2675311c2..72aa57964 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -31,6 +31,7 @@ import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; @@ -635,7 +636,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp if (!doneFile.exists()) { - callsFile = generateCellHashingCalls(rawCountMatrixDir, ctx.getOutputDir(), outputBasename, ctx.getLogger(), ctx.getSourceDirectory(), parameters); + callsFile = generateCellHashingCalls(rawCountMatrixDir, ctx.getOutputDir(), outputBasename, ctx.getLogger(), ctx.getSourceDirectory(), parameters, ctx); try { @@ -1195,7 +1196,7 @@ private File getMolInfoFileFromCounts(File citeSeqCountOutDir) return new File(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); } - public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters) throws PipelineJobException + public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters, PipelineContext ctx) throws PipelineJobException { log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath()); @@ -1325,6 +1326,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St writer.println("\t-e CELLHASHR_DEBUG=1 \\"); writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(writer::println); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 9f0ff010f..579446acd 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -602,6 +602,7 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu writer.println("\t--memory='" + maxRam + "g' \\"); } + getPipelineCtx().getDockerVolumes().forEach(writer::println); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); From 66fb1d38be9afdbe3a204306d8067a240001fbf6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 15 Nov 2024 15:54:35 -0800 Subject: [PATCH 24/37] Missed with last commit --- .../labkey/sequenceanalysis/analysis/DeepVariantHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java index 249e53344..04d2c919e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java @@ -125,7 +125,7 @@ public void processFilesRemote(List inputFiles, JobContext c } boolean retainVcf = ctx.getParams().optBoolean("retainVcf", false); - wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, ctx.getFileManager(), binVersion, args); + wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, ctx.getFileManager(), binVersion, args, ctx); action.addOutput(outputFile, "gVCF File", false); From 1670bcfa7527fa7fbf6376b720e930bea92ed5a8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 16 Nov 2024 08:02:59 -0800 Subject: [PATCH 25/37] Add option for paragraph to remove BNDs --- .../run/alignment/ParagraphStep.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index fa1335126..0b6d16e74 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -18,6 +18,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.DockerWrapper; +import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; @@ -43,6 +44,9 @@ public ParagraphStep() {{ put("allowBlank", false); }}, null), + ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{ + put("checked", true); + }}, false), ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ put("checked", true); }}, false) @@ -106,6 +110,32 @@ else if (!svVcf.exists()) throw new PipelineJobException("Missing file: " + svVcf.getPath()); } + boolean doBndSubset = ctx.getParams().optBoolean("doBndSubset", false); + if (doBndSubset) + { + File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "nobnd.vcf.gz"); + File vcfNoBndIdx = new File(vcfNoBnd.getPath() + ".tbi"); + if (vcfNoBndIdx.exists()) + { + ctx.getLogger().debug("Index exists, will no repeat BND subset"); + } + else + { + SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger()); + List selectArgs = new ArrayList<>(); + selectArgs.add("-select"); + selectArgs.add("SVTYPE != 'BND'"); + selectArgs.add("--exclude-filtered"); + selectArgs.add("--exclude-non-variants"); + + svw.execute(ctx.getSequenceSupport().getCachedGenome(inputFiles.get(0).getLibrary_id()).getWorkingFastaFile(), svVcf, vcfNoBnd, selectArgs); + + ctx.getFileManager().addIntermediateFile(vcfNoBnd); + ctx.getFileManager().addIntermediateFile(vcfNoBndIdx); + svVcf = vcfNoBnd; + } + } + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); for (SequenceOutputFile so : inputFiles) { From b0df25a9dfa4816f9925af898161b16311ebcd6b Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 16 Nov 2024 10:45:09 -0800 Subject: [PATCH 26/37] Fix path to nimble HTML --- .../org/labkey/singlecell/run/NimbleHelper.java | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 579446acd..2404f8873 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -288,7 +288,7 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin description += "\nscore_percent: " + genome.getScorePercent(); } - output.addSequenceOutput(results, basename + ": nimble align", "Nimble Alignment", rs.getRowId(), null, genome.getGenomeId(), description); + output.addSequenceOutput(results, basename + ": nimble align", "Nimble Results", rs.getRowId(), null, genome.getGenomeId(), description); File reportHtml = getReportHtmlFileFromResults(results); if (!reportHtml.exists()) @@ -300,17 +300,7 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin } else { - output.addSequenceOutput(results, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); - } - - File outputBam = new File(results.getPath().replaceAll("results." + genome.genomeId + ".txt.gz", "nimbleAlignment." + genome.genomeId + ".bam")); - if (outputBam.exists()) - { - output.addSequenceOutput(outputBam, basename + ": nimble align", "Nimble Alignment", rs.getRowId(), null, genome.getGenomeId(), description); - } - else - { - getPipelineCtx().getLogger().debug("BAM not found: " + outputBam.getPath()); + output.addSequenceOutput(reportHtml, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); } } } From 28ed55b24041bf83ad833ac89866d6834a51d99a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 16 Nov 2024 16:33:31 -0800 Subject: [PATCH 27/37] Add null check --- .../src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index e1cc31cbd..32dc295ed 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -188,7 +188,7 @@ public void setFolderFileRoot(PipeRoot folderFileRoot) public Collection getDockerVolumes() { - return Collections.unmodifiableCollection(_dockerVolumes); + return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); } public void setDockerVolumes(Collection dockerVolumes) From b97713f3522cfc578078e0cd45edb96f4cf1d6fa Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 17 Nov 2024 06:18:59 -0800 Subject: [PATCH 28/37] Fix docker syntax --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 +- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 +- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 2 +- .../labkey/sequenceanalysis/run/analysis/NextCladeHandler.java | 2 +- .../labkey/sequenceanalysis/run/analysis/PangolinHandler.java | 2 +- .../api/singlecell/pipeline/AbstractSingleCellPipelineStep.java | 2 +- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 2 +- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 8da448235..12b2158fa 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -52,7 +52,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - _ctx.getDockerVolumes().forEach(writer::println); + _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); if (_tmpDir != null) { writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index bd4e1cb29..61ce01cd2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -291,7 +291,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t -w /work \\"); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 667eb97b2..0490ed063 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -270,7 +270,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index 0124cc0ae..6e5e4320d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -184,7 +184,7 @@ public static File runNextClade(File consensusFasta, Logger log, PipelineOutputT } writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index f2055e5a5..56608dde2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -308,7 +308,7 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index 1f2b4fabe..e5412f2d3 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -350,7 +350,7 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 72aa57964..6aab5bcdc 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1326,7 +1326,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St writer.println("\t-e CELLHASHR_DEBUG=1 \\"); writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(writer::println); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 2404f8873..862209354 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -592,7 +592,7 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu writer.println("\t--memory='" + maxRam + "g' \\"); } - getPipelineCtx().getDockerVolumes().forEach(writer::println); + getPipelineCtx().getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); From 482aa9f9052217ef36a11c42ad4b4bbb13d1a88b Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 18 Nov 2024 09:10:23 -0800 Subject: [PATCH 29/37] Add action to repeat nimble report/plot steps --- .../labkey/singlecell/SingleCellModule.java | 2 + .../labkey/singlecell/run/NimbleHelper.java | 129 +++++++------ .../run/RepeatNimbleReportHandler.java | 179 ++++++++++++++++++ 3 files changed, 252 insertions(+), 58 deletions(-) create mode 100644 singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 4ce393e0f..7749acfd1 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -109,6 +109,7 @@ import org.labkey.singlecell.run.CellRangerVDJWrapper; import org.labkey.singlecell.run.NimbleAlignmentStep; import org.labkey.singlecell.run.NimbleAnalysis; +import org.labkey.singlecell.run.RepeatNimbleReportHandler; import org.labkey.singlecell.run.VelocytoAlignmentStep; import org.labkey.singlecell.run.VelocytoAnalysisStep; @@ -221,6 +222,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new CellRangerRawDataHandler()); SequenceAnalysisService.get().registerFileHandler(new ProcessSingleCellHandler()); SequenceAnalysisService.get().registerFileHandler(new ProcessSeuratObjectHandler()); + SequenceAnalysisService.get().registerFileHandler(new RepeatNimbleReportHandler()); //Single-cell: SequencePipelineService.get().registerPipelineStep(new AppendCiteSeq.Provider()); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 862209354..fe3431739 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -53,6 +53,8 @@ public class NimbleHelper private final PipelineStepProvider _provider; private final int _stepIdx; + public static final String NIMBLE_REPORT_CATEGORY = "Nimble Report"; + public NimbleHelper(PipelineContext ctx, PipelineStepProvider provider, int stepIdx) { _ctx = ctx; @@ -300,7 +302,7 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin } else { - output.addSequenceOutput(reportHtml, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description); + output.addSequenceOutput(reportHtml, basename + ": nimble report", NIMBLE_REPORT_CATEGORY, rs.getRowId(), null, genome.getGenomeId(), description); } } } @@ -474,76 +476,82 @@ private Map doAlignment(List genomes, List reportArgs = new ArrayList<>(); - reportArgs.add("python3"); - reportArgs.add("-m"); - reportArgs.add("nimble"); + File reportResultsGz = runNimbleReport(alignResultsGz, genome.genomeId, output, getPipelineCtx()); + resultMap.put(genome, reportResultsGz); + } - reportArgs.add("report"); - reportArgs.add("-i"); - reportArgs.add("/work/" + alignResultsGz.getName()); + return resultMap; + } - File reportResultsGz = new File(getPipelineCtx().getWorkingDirectory(), "reportResults." + genome.genomeId + ".txt"); - if (reportResultsGz.exists()) - { - reportResultsGz.delete(); - } + public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException + { + List reportArgs = new ArrayList<>(); + reportArgs.add("python3"); + reportArgs.add("-m"); + reportArgs.add("nimble"); - reportArgs.add("-o"); - reportArgs.add("/work/" + reportResultsGz.getName()); + reportArgs.add("report"); + reportArgs.add("-i"); + reportArgs.add("/work/" + alignResultsGz.getName()); - runUsingDocker(reportArgs, output, null); + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt"); + if (reportResultsGz.exists()) + { + reportResultsGz.delete(); + } - if (!reportResultsGz.exists()) - { - throw new PipelineJobException("Missing file: " + reportResultsGz.getPath()); - } + reportArgs.add("-o"); + reportArgs.add("/work/" + reportResultsGz.getName()); - resultMap.put(genome, reportResultsGz); + runUsingDocker(reportArgs, output, null, ctx); - if (SequencePipelineService.get().hasMinLineCount(alignResultsGz, 2)) - { - // Also run nimble plot. Always re-run since this is fast: - List plotArgs = new ArrayList<>(); - plotArgs.add("python3"); - plotArgs.add("-m"); - plotArgs.add("nimble"); + if (!reportResultsGz.exists()) + { + throw new PipelineJobException("Missing file: " + reportResultsGz.getPath()); + } - plotArgs.add("plot"); - plotArgs.add("--input_file"); - plotArgs.add("/work/" + alignResultsGz.getName()); + if (SequencePipelineService.get().hasMinLineCount(alignResultsGz, 2)) + { + // Also run nimble plot. Always re-run since this is fast: + List plotArgs = new ArrayList<>(); + plotArgs.add("python3"); + plotArgs.add("-m"); + plotArgs.add("nimble"); - File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); - if (reportResultsGz.exists()) - { - plotResultsHtml.delete(); - } + plotArgs.add("plot"); + plotArgs.add("--input_file"); + plotArgs.add("/work/" + alignResultsGz.getName()); - plotArgs.add("--output_file"); - plotArgs.add("/work/" + plotResultsHtml.getName()); + File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); + if (plotResultsHtml.exists()) + { + plotResultsHtml.delete(); + } - runUsingDocker(plotArgs, output, null); + plotArgs.add("--output_file"); + plotArgs.add("/work/" + plotResultsHtml.getName()); - if (!plotResultsHtml.exists()) - { - throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); - } - } - else + runUsingDocker(plotArgs, output, null, ctx); + + if (!plotResultsHtml.exists()) { - getPipelineCtx().getLogger().info("Only single line found in results, skipping nimble plot"); + throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); } } + else + { + ctx.getLogger().info("Only single line found in results, skipping nimble plot"); + } - return resultMap; + return reportResultsGz; } - private File getReportHtmlFileFromResults(File reportResults) + public static File getReportHtmlFileFromResults(File reportResults) { return new File(reportResults.getPath().replaceAll("txt(.gz)*$", "html")); } - private File getNimbleDoneFile(File parentDir, String resumeString) + private static File getNimbleDoneFile(File parentDir, String resumeString) { return new File(parentDir, "nimble." + resumeString + ".done"); } @@ -552,13 +560,18 @@ private File getNimbleDoneFile(File parentDir, String resumeString) private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString) throws PipelineJobException { - File localBashScript = new File(getPipelineCtx().getWorkingDirectory(), "docker.sh"); - File dockerBashScript = new File(getPipelineCtx().getWorkingDirectory(), "dockerRun.sh"); + return runUsingDocker(nimbleArgs, output, resumeString, getPipelineCtx()); + } + + private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString, PipelineContext ctx) throws PipelineJobException + { + File localBashScript = new File(ctx.getWorkingDirectory(), "docker.sh"); + File dockerBashScript = new File(ctx.getWorkingDirectory(), "dockerRun.sh"); output.addIntermediateFile(localBashScript); output.addIntermediateFile(dockerBashScript); // Create temp folder: - File tmpDir = new File(getPipelineCtx().getWorkingDirectory(), "tmpDir"); + File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir"); if (tmpDir.exists()) { try @@ -592,7 +605,7 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu writer.println("\t--memory='" + maxRam + "g' \\"); } - getPipelineCtx().getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); @@ -623,22 +636,22 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu File doneFile = null; if (resumeString != null) { - doneFile = getNimbleDoneFile(getPipelineCtx().getWorkingDirectory(), resumeString); + doneFile = getNimbleDoneFile(ctx.getWorkingDirectory(), resumeString); output.addIntermediateFile(doneFile); if (doneFile.exists()) { - getPipelineCtx().getLogger().info("Nimble already completed, resuming: " + resumeString); + ctx.getLogger().info("Nimble already completed, resuming: " + resumeString); return false; } else { - getPipelineCtx().getLogger().debug("done file not found: " + doneFile.getPath()); + ctx.getLogger().debug("done file not found: " + doneFile.getPath()); } } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); - rWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory()); + SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); + rWrapper.setWorkingDir(ctx.getWorkingDirectory()); rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); if (doneFile != null) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java new file mode 100644 index 000000000..f53b05383 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -0,0 +1,179 @@ +package org.labkey.singlecell.run; + +import org.apache.commons.text.similarity.SimilarityScoreFrom; +import org.json.JSONObject; +import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.data.ContainerType; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; +import org.labkey.api.exp.api.DataType; +import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExperimentService; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.DefaultPipelineStepOutput; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileType; +import org.labkey.singlecell.SingleCellModule; +import org.labkey.singlecell.SingleCellSchema; + +import java.io.File; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class RepeatNimbleReportHandler extends AbstractParameterizedOutputHandler +{ + public RepeatNimbleReportHandler() + { + super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Convert To Cram", "This will convert a BAM file to CRAM, replacing the original", null, Arrays.asList( + ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true), + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true) + ) + ); + } + + private static final FileType _nimbleResultsGz = new FileType(".txt.gz", FileType.gzSupportLevel.NO_GZ); + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && o.getFile().exists() && o.getFile().getName().startsWith("reportResults") && _nimbleResultsGz.isType(o.getFile()); + } + + @Override + public boolean useWorkbooks() + { + return true; + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + private static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + PipelineStepOutput output = new DefaultPipelineStepOutput(); + + for (SequenceOutputFile so : inputFiles) + { + // This is the prior report results: + File alignmentFile = new File(so.getFile().getParentFile(), so.getFile().getName().replaceAll("reportResults", "alignResults")); + if (!alignmentFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + alignmentFile.getPath()); + } + + // This will update these files in-place: + File reportFile = NimbleHelper.runNimbleReport(alignmentFile, so.getLibrary_id(), output, ctx); + if (!reportFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + reportFile.getPath()); + } + + File htmlFile = NimbleHelper.getReportHtmlFileFromResults(reportFile); + if (!htmlFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + htmlFile.getPath()); + } + } + + ctx.getFileManager().addIntermediateFiles(output.getIntermediateFiles()); + } + + @Override + public void complete(JobContext ctx, List inputs, List outputsCreated) throws PipelineJobException + { + // Because the plot output was added later, re-create this if it doesnt exist: + for (SequenceOutputFile so : inputs) + { + File plotFile = NimbleHelper.getReportHtmlFileFromResults(so.getFile()); + + TableInfo ti = QueryService.get().getUserSchema(ctx.getJob().getUser(), so.getContainerObj(), SingleCellSchema.SEQUENCE_SCHEMA_NAME).getTable("outputfiles"); + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("category"), "").addCondition(FieldKey.fromString("dataid/dataFileUrl"), plotFile.toURI().toString()); + TableSelector ts = new TableSelector(ti, filter, null); + if (!ts.exists()) + { + ExpData expData = ExperimentService.get().getExpDataByURL(plotFile, so.getContainerObj()); + if (expData == null) + { + expData = ExperimentService.get().createData(so.getContainerObj(), new DataType("Nimble Results")); + expData.setDataFileURI(plotFile.toURI()); + expData.setName(plotFile.getName()); + expData.save(ctx.getJob().getUser()); + } + + Map toInsert = new CaseInsensitiveHashMap<>(); + toInsert.put("name", so.getName().replaceAll("nimble results", "nimble report")); + toInsert.put("category", NimbleHelper.NIMBLE_REPORT_CATEGORY); + toInsert.put("description", so.getDescription()); + toInsert.put("dataid", expData.getRowId()); + toInsert.put("library_id", so.getLibrary_id()); + toInsert.put("runid", so.getRunId()); + toInsert.put("analysis_id", so.getAnalysis_id()); + + try + { + ti.getUpdateService().insertRows(ctx.getJob().getUser(), so.getContainerObj(), Collections.singletonList(toInsert), new BatchValidationException(), null, null); + } + catch (SQLException | BatchValidationException | QueryUpdateServiceException | DuplicateKeyException e) + { + throw new PipelineJobException(e); + } +; } + else + { + ctx.getLogger().debug("Plot file output exists, will not re-create"); + } + } + } + } +} From 25f7b2b4275b4efcb2ef95def242cf9c9b720236 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 18 Nov 2024 12:37:01 -0800 Subject: [PATCH 30/37] Update paragraph defaults and subset logic --- .../run/alignment/ParagraphStep.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 0b6d16e74..fceba87ac 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -45,10 +45,10 @@ public ParagraphStep() put("allowBlank", false); }}, null), ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{ - put("checked", true); + put("checked", false); }}, false), ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ - put("checked", true); + put("checked", false); }}, false) )); } @@ -113,11 +113,11 @@ else if (!svVcf.exists()) boolean doBndSubset = ctx.getParams().optBoolean("doBndSubset", false); if (doBndSubset) { - File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "nobnd.vcf.gz"); + File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "pgSubset.vcf.gz"); File vcfNoBndIdx = new File(vcfNoBnd.getPath() + ".tbi"); if (vcfNoBndIdx.exists()) { - ctx.getLogger().debug("Index exists, will no repeat BND subset"); + ctx.getLogger().debug("Index exists, will no repeat VCF subset"); } else { @@ -125,8 +125,13 @@ else if (!svVcf.exists()) List selectArgs = new ArrayList<>(); selectArgs.add("-select"); selectArgs.add("SVTYPE != 'BND'"); + selectArgs.add("-select"); + selectArgs.add("!(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); + selectArgs.add("-select"); + selectArgs.add("POS > 150"); + selectArgs.add("--exclude-filtered"); selectArgs.add("--exclude-filtered"); - selectArgs.add("--exclude-non-variants"); + selectArgs.add("--sites-only-vcf-output"); svw.execute(ctx.getSequenceSupport().getCachedGenome(inputFiles.get(0).getLibrary_id()).getWorkingFastaFile(), svVcf, vcfNoBnd, selectArgs); @@ -173,7 +178,7 @@ else if (!svVcf.exists()) try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) { SAMFileHeader header = reader.getFileHeader(); - if (header.getReadGroups().size() == 0) + if (header.getReadGroups().isEmpty()) { throw new PipelineJobException("No read groups found in input BAM"); } From b0d611e0f2b269b717df85ce68d1a77f9395db52 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 08:59:41 -0800 Subject: [PATCH 31/37] Update paraGRAPH filtering and adjust nimble report FileType --- .../sequenceanalysis/run/alignment/ParagraphStep.java | 6 +----- .../labkey/singlecell/run/RepeatNimbleReportHandler.java | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index fceba87ac..fa929a0ad 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -124,11 +124,7 @@ else if (!svVcf.exists()) SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger()); List selectArgs = new ArrayList<>(); selectArgs.add("-select"); - selectArgs.add("SVTYPE != 'BND'"); - selectArgs.add("-select"); - selectArgs.add("!(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); - selectArgs.add("-select"); - selectArgs.add("POS > 150"); + selectArgs.add("SVTYPE != 'BND' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); selectArgs.add("--exclude-filtered"); selectArgs.add("--exclude-filtered"); selectArgs.add("--sites-only-vcf-output"); diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index f53b05383..315fe4d80 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -52,12 +52,12 @@ public RepeatNimbleReportHandler() ); } - private static final FileType _nimbleResultsGz = new FileType(".txt.gz", FileType.gzSupportLevel.NO_GZ); + private static final FileType _nimbleResultsGz = new FileType(".txt", FileType.gzSupportLevel.SUPPORT_GZ); @Override public boolean canProcess(SequenceOutputFile o) { - return o.getFile() != null && o.getFile().exists() && o.getFile().getName().startsWith("reportResults") && _nimbleResultsGz.isType(o.getFile()); + return o.getFile() != null && o.getFile().exists() && o.getFile().getName().startsWith("reportResults.") && _nimbleResultsGz.isType(o.getFile()); } @Override From 4caf6c0abaf6beb1fbc98bf8f69038676312e5cc Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 12:19:42 -0800 Subject: [PATCH 32/37] Correct name of RepeatNimbleReportHandler --- .../org/labkey/singlecell/run/RepeatNimbleReportHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 315fe4d80..0b1b1c0fc 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -41,7 +41,7 @@ public class RepeatNimbleReportHandler extends AbstractParameterizedOutputHandle { public RepeatNimbleReportHandler() { - super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Convert To Cram", "This will convert a BAM file to CRAM, replacing the original", null, Arrays.asList( + super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Re-run Nimble Report", "This will re-run nimble report and nimble plot for the selected run", null, Arrays.asList( ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{ put("checked", true); }}, true), From 2f05093fe403a038e63a9e1c13c0cd3dae3bfd7f Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 12:20:27 -0800 Subject: [PATCH 33/37] Correct name of RepeatNimbleReportHandler --- .../org/labkey/singlecell/run/RepeatNimbleReportHandler.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 0b1b1c0fc..3985ec92d 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -41,10 +41,7 @@ public class RepeatNimbleReportHandler extends AbstractParameterizedOutputHandle { public RepeatNimbleReportHandler() { - super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Re-run Nimble Report", "This will re-run nimble report and nimble plot for the selected run", null, Arrays.asList( - ToolParameterDescriptor.create("replaceOriginal", "Replace Original File", "If selected, the input BAM will be deleted and the database record will be switched to use this filepath.", "checkbox", new JSONObject(){{ - put("checked", true); - }}, true), + super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Re-run Nimble Report", "This will re-run nimble report and nimble plot for the selected run and replace the original files in-place.", null, Arrays.asList( ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ put("checked", true); }}, true) From edc37be9e0801f1c553d452da5dbb7ec781537c6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 12:23:45 -0800 Subject: [PATCH 34/37] Bugfix to location of alignResults --- .../labkey/singlecell/run/RepeatNimbleReportHandler.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 3985ec92d..ba5b5f1c8 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -95,6 +95,11 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport } + private File getAlignmentResults(File reportResults) + { + return new File(reportResults.getParentFile(), reportResults.getName().replaceAll("reportResults", "alignResults") + ".gz"); + } + @Override public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException { @@ -103,7 +108,7 @@ public void processFilesRemote(List inputFiles, JobContext c for (SequenceOutputFile so : inputFiles) { // This is the prior report results: - File alignmentFile = new File(so.getFile().getParentFile(), so.getFile().getName().replaceAll("reportResults", "alignResults")); + File alignmentFile = getAlignmentResults(so.getFile()); if (!alignmentFile.exists()) { throw new PipelineJobException("Unable to find file: " + alignmentFile.getPath()); From 2cfc078a4892b5e7be131207dd99058afae2eac8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 13:22:03 -0800 Subject: [PATCH 35/37] Ensure nimble alignment file copied locally for docker --- .../src/org/labkey/singlecell/run/NimbleHelper.java | 11 ++++++++--- .../singlecell/run/RepeatNimbleReportHandler.java | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index fe3431739..3c2268aa4 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -670,18 +670,23 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu } private File ensureLocalCopy(File input, PipelineStepOutput output) throws PipelineJobException + { + return ensureLocalCopy(input, output, getPipelineCtx()); + } + + public static File ensureLocalCopy(File input, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException { try { - if (getPipelineCtx().getWorkingDirectory().equals(input.getParentFile())) + if (ctx.getWorkingDirectory().equals(input.getParentFile())) { return input; } - File local = new File(getPipelineCtx().getWorkingDirectory(), input.getName()); + File local = new File(ctx.getWorkingDirectory(), input.getName()); if (!local.exists()) { - getPipelineCtx().getLogger().debug("Copying file locally: " + input.getPath()); + ctx.getLogger().debug("Copying file locally: " + input.getPath()); FileUtils.copyFile(input, local); } diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index ba5b5f1c8..40ba84d3a 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -115,7 +115,8 @@ public void processFilesRemote(List inputFiles, JobContext c } // This will update these files in-place: - File reportFile = NimbleHelper.runNimbleReport(alignmentFile, so.getLibrary_id(), output, ctx); + File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile); + File reportFile = NimbleHelper.runNimbleReport(alignmentFileLocal, so.getLibrary_id(), output, ctx); if (!reportFile.exists()) { throw new PipelineJobException("Unable to find file: " + reportFile.getPath()); From c2245926c48edb4186ed9dcd17ccc7c3bbb0733e Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 13:22:26 -0800 Subject: [PATCH 36/37] Ensure nimble alignment file copied locally for docker --- .../org/labkey/singlecell/run/RepeatNimbleReportHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 40ba84d3a..166ca2a51 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -115,7 +115,7 @@ public void processFilesRemote(List inputFiles, JobContext c } // This will update these files in-place: - File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile); + File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile, output, ctx); File reportFile = NimbleHelper.runNimbleReport(alignmentFileLocal, so.getLibrary_id(), output, ctx); if (!reportFile.exists()) { From d59d5e79aad6673c9b7063e8ecd4295de49fb860 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 19 Nov 2024 16:04:02 -0800 Subject: [PATCH 37/37] Copy nimble report when completed --- .../run/RepeatNimbleReportHandler.java | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 166ca2a51..9228cd71e 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -1,9 +1,8 @@ package org.labkey.singlecell.run; -import org.apache.commons.text.similarity.SimilarityScoreFrom; +import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.collections.CaseInsensitiveHashMap; -import org.labkey.api.data.ContainerType; import org.labkey.api.data.SimpleFilter; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; @@ -31,6 +30,7 @@ import org.labkey.singlecell.SingleCellSchema; import java.io.File; +import java.io.IOException; import java.sql.SQLException; import java.util.Arrays; import java.util.Collections; @@ -127,6 +127,32 @@ public void processFilesRemote(List inputFiles, JobContext c { throw new PipelineJobException("Unable to find file: " + htmlFile.getPath()); } + + // Replace the originals: + try + { + File targetHtml = new File(so.getFile().getParentFile(), htmlFile.getName()); + if (targetHtml.exists()) + { + targetHtml.delete(); + } + FileUtils.moveFile(htmlFile, targetHtml); + + File targetReport = new File(so.getFile().getParentFile(), reportFile.getName()); + if (targetReport.exists()) + { + targetReport.delete(); + } + else + { + ctx.getLogger().error("Expected report file to exist: " + targetReport.getPath()); + } + FileUtils.moveFile(reportFile, targetReport); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } } ctx.getFileManager().addIntermediateFiles(output.getIntermediateFiles());