From a09715da1c343da0d2daf8618198203f2fb58fe2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 20 Nov 2024 20:50:04 -0800 Subject: [PATCH 01/53] Improve paragraph logging --- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index fa929a0ad..042a7d586 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -223,6 +223,8 @@ else if (!svVcf.exists()) paragraphArgs.add("-m"); paragraphArgs.add("/work/" + coverageFile.getName()); + paragraphArgs.add("--verbose"); + paragraphArgs.add("-r"); File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(); dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager()); From 74ee60ad622de749b702b96d033f3a3485644b25 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 21 Nov 2024 06:44:19 -0800 Subject: [PATCH 02/53] Improve paragraph logging --- .../run/alignment/ParagraphStep.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 042a7d586..e21ae1217 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -44,11 +44,14 @@ public ParagraphStep() {{ put("allowBlank", false); }}, null), - ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{ + ToolParameterDescriptor.create("doBndSubset", "Filter Input VCF", "If selected, prior to running SelectVariants will be run to remove BNDs sites with POS<150 and symbolic INS without ALT sequence", "checkbox", new JSONObject(){{ put("checked", false); }}, false), ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ put("checked", false); + }}, false), + ToolParameterDescriptor.create("debug", "Debug Logging", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ + put("checked", false); }}, false) )); } @@ -223,7 +226,14 @@ else if (!svVcf.exists()) paragraphArgs.add("-m"); paragraphArgs.add("/work/" + coverageFile.getName()); - paragraphArgs.add("--verbose"); + if (ctx.getParams().optBoolean("debug", false)) + { + paragraphArgs.add("--debug"); + } + else + { + paragraphArgs.add("--verbose"); + } paragraphArgs.add("-r"); File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(); From 0891dd90cff45d50477a3a36974c62b210e964d0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 21 Nov 2024 08:49:46 -0800 Subject: [PATCH 03/53] Support bcftools fill-from-fasta --- .../SequenceAnalysisModule.java | 2 + .../analysis/BcftoolsFillFromFastaStep.java | 116 ++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 338bf6939..61c9a0fe1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -104,6 +104,7 @@ import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; import org.labkey.sequenceanalysis.run.analysis.BamIterator; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillFromFastaStep; import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep; import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep; import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; @@ -365,6 +366,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider()); //handlers diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java new file mode 100644 index 000000000..2068f56d6 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java @@ -0,0 +1,116 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import htsjdk.samtools.util.Interval; +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class BcftoolsFillFromFastaStep extends AbstractCommandPipelineStep implements VariantProcessingStep +{ + public BcftoolsFillFromFastaStep(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BcftoolsRunner(ctx.getLogger())); + } + + public static class Provider extends AbstractVariantProcessingStepProvider implements SupportsScatterGather + { + public Provider() + { + super("BcftoolsFillFromFastaStep", "Bcftools Fill-From-FASTA", "bcftools", "This will replace REF alleles listed as N with the sequence from the FASTA file.", Arrays.asList( + + ), null, null); + } + + @Override + public BcftoolsFillFromFastaStep create(PipelineContext ctx) + { + return new BcftoolsFillFromFastaStep(this, ctx); + } + } + + @Override + public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException + { + VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); + + List options = new ArrayList<>(); + options.add(BcftoolsRunner.getBcfToolsPath().getPath()); + options.add("+fill-form-fasta"); + + options.add(inputVCF.getPath()); + + if (intervals != null) + { + options.add("--regions"); + options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(","))); + } + + options.add("-O"); + options.add("z9"); + + Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (threads != null) + { + options.add("--threads"); + options.add(threads.toString()); + } + + File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".fill.vcf.gz"); + options.add("-o"); + options.add(outputVcf.getPath()); + + options.add("--"); + + options.add("-"); + options.add(genome.getWorkingFastaFile().getPath()); + + options.add("-c"); + options.add("REF"); + + BcftoolsRunner wrapper = getWrapper(); + + String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS")); + if (bcfPluginDir != null) + { + getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir); + wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir); + } + + wrapper.execute(options); + if (!outputVcf.exists()) + { + throw new PipelineJobException("output not found: " + outputVcf); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + output.setVcf(outputVcf); + + return output; + } +} From 60c109db6b624152aaeb19177721ce6d9d8f4d63 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 21 Nov 2024 10:27:31 -0800 Subject: [PATCH 04/53] Improve error handling in paragraph --- .../run/alignment/ParagraphStep.java | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index e21ae1217..c0d749fd2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -52,6 +52,9 @@ public ParagraphStep() }}, false), ToolParameterDescriptor.create("debug", "Debug Logging", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ put("checked", false); + }}, false), + ToolParameterDescriptor.create("retrieveReferenceSeq", "Retrieve Reference Sequence", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ + put("checked", false); }}, false) )); } @@ -218,6 +221,24 @@ else if (!svVcf.exists()) paragraphArgs.add("-o"); paragraphArgs.add("/work/" + paragraphOutDir.getName()); + File scratchDir = new File(ctx.getOutputDir(), "pgScratch"); + if (scratchDir.exists()) + { + try + { + FileUtils.deleteDirectory(scratchDir); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + paragraphArgs.add("--scratch-dir"); + paragraphArgs.add(scratchDir.getPath()); + + ctx.getFileManager().addIntermediateFile(scratchDir); + paragraphArgs.add("-i"); dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager()); dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager()); @@ -241,9 +262,10 @@ else if (!svVcf.exists()) dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager()); paragraphArgs.add("/work/" + genomeFasta.getName()); - paragraphArgs.add("--scratch-dir"); - paragraphArgs.add("/tmp"); - dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + if (ctx.getParams().optBoolean("retrieveReferenceSeq", false)) + { + paragraphArgs.add("--retrieve-reference-sequence"); + } if (threads != null) { From cb7d5e1fa4db1be2ceaa69912198208835ddd0e1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 22 Nov 2024 11:15:09 -0800 Subject: [PATCH 05/53] Update CellRangerVDJWrapper to support CR9 --- .../singlecell/run/CellRangerVDJWrapper.java | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java index 36525672c..17972552c 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java @@ -3,6 +3,7 @@ import au.com.bytecode.opencsv.CSVReader; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.stream.IntStreams; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; @@ -61,6 +62,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.IntStream; public class CellRangerVDJWrapper extends AbstractCommandWrapper { @@ -862,6 +864,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp Map chimericCallsRecovered = new HashMap<>(); int restoredTRDVAV = 0; + final Map headerIdx = new HashMap<>(); int lineIdx = 0; while ((line = reader.readLine()) != null) { @@ -873,6 +876,8 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp writer.println(line + ",chain_type"); } + String[] header = line.split(","); + IntStream.range(0, header.length).forEach(idx -> headerIdx.put(header[idx], idx)); continue; } @@ -880,18 +885,23 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings // Restore original value for TRD/TRA - if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV")) + final int vGeneIdx = headerIdx.get("v_gene"); + final int jGeneIdx = headerIdx.get("j_gene"); + final int cGeneIdx = headerIdx.get("c_gene"); + final int chainIdx = headerIdx.get("chain"); + + if (tokens[vGeneIdx].contains("TRDV") && tokens[vGeneIdx].contains("/") && tokens[vGeneIdx].contains("AV")) { restoredTRDVAV++; - String[] split = tokens[6].split("/"); - tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); + String[] split = tokens[vGeneIdx].split("/"); + tokens[vGeneIdx] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); } List chains = new ArrayList<>(); String vGeneChain = null; String jGeneChain = null; String cGeneChain = null; - for (int idx : new Integer[]{6,8,9}) + for (int idx : new Integer[]{vGeneIdx,jGeneIdx,cGeneIdx}) { String val = StringUtils.trimToNull(tokens[idx]); if (val != null) @@ -899,15 +909,15 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp val = val.substring(0, 3); chains.add(val); - if (idx == 6) + if (idx == vGeneIdx) { vGeneChain = val; } - if (idx == 8) + if (idx == jGeneIdx) { jGeneChain = val; } - else if (idx == 9) + else if (idx == cGeneIdx) { cGeneChain = val; } @@ -915,7 +925,7 @@ else if (idx == 9) } Set uniqueChains = new HashSet<>(chains); - String originalChain = StringUtils.trimToNull(tokens[5]); + String originalChain = StringUtils.trimToNull(tokens[chainIdx]); // Recover TRDV/TRAJ/TRAC: if (uniqueChains.size() > 1) @@ -925,7 +935,7 @@ else if (idx == 9) { uniqueChains.clear(); uniqueChains.add(cGeneChain); - String key = originalChain + "->" + cGeneChain + " (based on C-GENE)"; + String key = vGeneChain + ":" + jGeneChain + ":" + originalChain + "->" + cGeneChain + " (based on C-GENE)"; chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); } else if (uniqueChains.size() == 2) @@ -950,14 +960,14 @@ else if (uniqueChains.size() == 2) if (uniqueChains.size() == 1) { String chain = uniqueChains.iterator().next(); - tokens[5] = chain; + tokens[chainIdx] = chain; } else { - log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); + log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[vGeneIdx] + "/" + tokens[jGeneIdx] + "/" + tokens[cGeneIdx]); } - if (acceptableChains.contains(tokens[5])) + if (acceptableChains.contains(tokens[chainIdx])) { writer.println(StringUtils.join(tokens, ",") + "," + chainType); } From caaeada19c47d901906e252f12ad942df4e712d7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 26 Nov 2024 09:08:11 -0800 Subject: [PATCH 06/53] Drop sudo from docker --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 4 ++-- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 4 ++-- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 4 ++-- .../sequenceanalysis/run/analysis/NextCladeHandler.java | 4 ++-- .../labkey/sequenceanalysis/run/analysis/PangolinHandler.java | 4 ++-- .../singlecell/pipeline/AbstractSingleCellPipelineStep.java | 4 ++-- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 4 ++-- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 12b2158fa..a923643c3 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -48,8 +48,8 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("WD=`pwd`"); writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + _containerName); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull " + _containerName); + writer.println("$DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 61ce01cd2..9afe468b6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -287,8 +287,8 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("WD=`pwd`"); writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/dnanexus-rnd/glnexus:" + binVersion); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull ghcr.io/dnanexus-rnd/glnexus:" + binVersion); + writer.println("$DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 0490ed063..f96c55232 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -266,8 +266,8 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai writer.println("WD=`pwd`"); writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull google/deepvariant:" + binVersion); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull google/deepvariant:" + binVersion); + writer.println("$DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index 6e5e4320d..0f4de0fab 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -168,8 +168,8 @@ public static File runNextClade(File consensusFasta, Logger log, PipelineOutputT writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull nextstrain/nextclade:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull nextstrain/nextclade:latest"); + writer.println("$DOCKER run --rm=true \\"); if (SequencePipelineService.get().getMaxThreads(log) != null) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index 56608dde2..346540b6b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -291,8 +291,8 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/bimberlabinternal/pangolin:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull ghcr.io/bimberlabinternal/pangolin:latest"); + writer.println("$DOCKER run --rm=true \\"); if (SequencePipelineService.get().getMaxThreads(log) != null) { diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index e5412f2d3..ce7ac101c 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -320,8 +320,8 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + dockerContainerName); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull " + dockerContainerName); + writer.println("$DOCKER run --rm=true \\"); Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); if (maxThreads != null) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 6aab5bcdc..16a2fd6ba 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1311,8 +1311,8 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/bimberlab/cellhashr:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull ghcr.io/bimberlab/cellhashr:latest"); + writer.println("$DOCKER run --rm=true \\"); if (SequencePipelineService.get().getMaxRam() != null) { writer.println("\t--memory=" + SequencePipelineService.get().getMaxRam() + "g \\"); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 3c2268aa4..e96e9bd46 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -594,8 +594,8 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu writer.println("HOME=`echo ~/`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + DOCKER_CONTAINER_NAME); - writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("$DOCKER pull " + DOCKER_CONTAINER_NAME); + writer.println("$DOCKER run --rm=true \\"); Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) From a79c0ec9547e10e38afc45c3e178e663522746b9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 26 Nov 2024 09:49:40 -0800 Subject: [PATCH 07/53] Improve slurm sacct parsing --- SequenceAnalysis/pipeline_code/extra_tools_install.sh | 1 + .../org/labkey/cluster/pipeline/SlurmExecutionEngine.java | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index a04deedb8..f7396d7f9 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -191,6 +191,7 @@ then cd ../ cp -R paragraph $LKTOOLS_DIR ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph + ln -s ${LKTOOLS_DIR}/paragraph/bin/idxdepth ${LKTOOLS_DIR}/idxdepth ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py else echo "Already installed" diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 52e9ae06c..0bddaba86 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -299,13 +299,14 @@ else if (headerFound) } // NOTE: if the line has blank ending columns, trimmed lines might lack that value - if (maxRssIdx > -1 && maxRssIdx < tokens.length) + if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) { try { - if (NumberUtils.isCreatable(tokens[maxRssIdx])) + String maxRSS = StringUtils.trimToNull(tokens[maxRssIdx]); + if (maxRSS != null) { - long bytes = FileSizeFormatter.convertStringRepresentationToBytes(tokens[maxRssIdx]); + long bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); long requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB if (bytes > requestInBytes) { From 9c0f8c2e7e1c300f7d30481ac3e15a2e48bc48a4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 27 Nov 2024 13:47:39 -0800 Subject: [PATCH 08/53] Fix BWA-Mem2 inheritance --- .../sequenceanalysis/run/alignment/BWAMem2Wrapper.java | 8 +------- .../sequenceanalysis/run/alignment/BWAMemWrapper.java | 7 ++++++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index e57fdf757..d7dc80fd1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -37,18 +37,12 @@ public BWAMem2Wrapper(@Nullable Logger logger) super(logger); } - public static class BWAMem2AlignmentStep extends BWAAlignmentStep + public static class BWAMem2AlignmentStep extends BWAMemAlignmentStep { public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger())); } - - @Override - public boolean doAddReadGroups() - { - return false; - } } public static class Provider extends AbstractAlignmentStepProvider diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java index 883b5b6fb..68599ecdd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java @@ -38,9 +38,14 @@ public BWAMemWrapper(@Nullable Logger logger) public static class BWAMemAlignmentStep extends BWAAlignmentStep { + public BWAMemAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, BWAMemWrapper wrapper) + { + super(provider, ctx, wrapper); + } + public BWAMemAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { - super(provider, ctx, new BWAMemWrapper(ctx.getLogger())); + this(provider, ctx, new BWAMemWrapper(ctx.getLogger())); } @Override From c33a6988b168e69161ca0f2a2e11373ed28d2ad8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 29 Nov 2024 12:41:55 -0800 Subject: [PATCH 09/53] Update slurm memory logging --- .../cluster/pipeline/SlurmExecutionEngine.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 0bddaba86..ac64f2a2b 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -14,8 +14,12 @@ import org.labkey.api.data.ContainerManager; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.PipelineJobService; +import org.labkey.api.pipeline.PipelineService; +import org.labkey.api.pipeline.PipelineStatusFile; import org.labkey.api.util.FileUtil; import org.labkey.api.util.Pair; +import org.labkey.api.writer.PrintWriters; import org.labkey.cluster.ClusterManager; import org.labkey.cluster.ClusterServiceImpl; import org.quartz.JobExecutionException; @@ -23,6 +27,7 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -311,6 +316,15 @@ else if (headerFound) if (bytes > requestInBytes) { info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G"; + + PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); + if (sf != null) + { + try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()))) + { + writer.println(info + ". Raw slurm value: " + maxRSS); + } + } } } } From 5f02031e7c5b1ba0945cf25ef2f0c4586b97ed76 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 29 Nov 2024 12:44:12 -0800 Subject: [PATCH 10/53] Update slurm memory logging --- .../src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index ac64f2a2b..dd6c3358f 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -315,7 +315,7 @@ else if (headerFound) long requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB if (bytes > requestInBytes) { - info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G"; + info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G, requested memory was: " + getConfig().getRequestMemory() + "G"; PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); if (sf != null) From b1289f47843980837bcb60fe125a2d46d8ae7e3d Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 29 Nov 2024 12:56:30 -0800 Subject: [PATCH 11/53] Handle FileNotFoundException --- .../org/labkey/cluster/pipeline/SlurmExecutionEngine.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index dd6c3358f..bbc465ff9 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -25,6 +25,7 @@ import org.quartz.JobExecutionException; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; @@ -324,6 +325,10 @@ else if (headerFound) { writer.println(info + ". Raw slurm value: " + maxRSS); } + catch (FileNotFoundException e) + { + _log.error("Unable to find log file for job, " + job.getJobId() + ": " + sf.getFilePath()); + } } } } From 55cc6f4b5bb1ffec8f39babad99b27e472c46c6c Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 29 Nov 2024 15:33:22 -0800 Subject: [PATCH 12/53] Attempt to remove copyJars() from SequenceAnalysis gradle build --- SequenceAnalysis/build.gradle | 18 +----------------- .../run/util/FastqcRunner.java | 10 ++-------- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/SequenceAnalysis/build.gradle b/SequenceAnalysis/build.gradle index f085ebe5c..ead7f308a 100644 --- a/SequenceAnalysis/build.gradle +++ b/SequenceAnalysis/build.gradle @@ -200,20 +200,4 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null & } } } -} - -project.tasks.register("copyJars", Copy) - { CopySpec copy -> - copy.group = "Build" - copy.description = "Copy commons-math3 JAR to module's lib directory" - - copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE) - copy.from(project.configurations.external) - copy.into new File("${project.labkey.explodedModuleLibDir}") - copy.include { - "**commons-math3-**.jar" - } - } - -project.tasks.named('module').configure { dependsOn(project.tasks.copyJars) } -project.tasks.named('copyJars').configure { mustRunAfter(project.tasks.populateExplodedLib) } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java index f30999c92..8252c7876 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java @@ -374,7 +374,8 @@ private List getBaseParams() throws FileNotFoundException throw new RuntimeException("Not found: " + htsjdkJar.getPath()); } - File commonsMath = new File(libDir, "commons-math3-3.6.1.jar"); + File apiLibDir = new File(ModuleLoader.getInstance().getModule("api").getExplodedPath(), "lib"); + File commonsMath = new File(apiLibDir, "commons-math3-3.6.1.jar"); if (!commonsMath.exists()) { throw new RuntimeException("Not found: " + commonsMath.getPath()); @@ -386,13 +387,6 @@ private List getBaseParams() throws FileNotFoundException throw new RuntimeException("Not found: " + jhdf5.getPath()); } - // NOTE: FastQC expects an alternate package name within this JAR, so use their packaged code instead: -// File base64 = new File(libDir, "base64-2.3.8.jar"); -// if (!base64.exists()) -// { -// throw new RuntimeException("Not found: " + base64.getPath()); -// } - List classPath = new ArrayList<>(); classPath.add("."); classPath.add(fastqcDir.getPath()); From 4a66ff314dd2f88161e4f8fcd5b1f33b7492c04f Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 30 Nov 2024 06:27:38 -0800 Subject: [PATCH 13/53] Update ParagraphStep parameters --- .../pipeline_code/extra_tools_install.sh | 2 ++ .../run/alignment/ParagraphStep.java | 31 +++++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index f7396d7f9..c905586b2 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -188,6 +188,8 @@ then unzip paragraph-v2.4a-binary.zip rm paragraph-v2.4a-binary.zip + python3 -m pip install pysam intervaltree + cd ../ cp -R paragraph $LKTOOLS_DIR ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index c0d749fd2..f85476a80 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -50,7 +50,7 @@ public ParagraphStep() ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ put("checked", false); }}, false), - ToolParameterDescriptor.create("debug", "Debug Logging", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ + ToolParameterDescriptor.create("verbose", "Verbose Logging", "If checked, --verbose will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ put("checked", false); }}, false), ToolParameterDescriptor.create("retrieveReferenceSeq", "Retrieve Reference Sequence", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ @@ -164,7 +164,24 @@ else if (!svVcf.exists()) depthArgs.add(threads.toString()); } - new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); + File doneFile = new File(ctx.getWorkingDirectory(), "idxdepth.done"); + ctx.getFileManager().addIntermediateFile(doneFile); + if (doneFile.exists()) + { + ctx.getLogger().info("idxdepth already performed, skipping"); + } + else + { + new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); + try + { + FileUtils.touch(doneFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } if (!coverageJson.exists()) { @@ -174,7 +191,7 @@ else if (!svVcf.exists()) // Should produce a simple text file: // id path depth read length - // TNPRC-IB18 ../IB18.cram 29.77 150 + // IB18 ../IB18.cram 29.77 150 File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); String rgId = null; try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) @@ -235,7 +252,7 @@ else if (!svVcf.exists()) } paragraphArgs.add("--scratch-dir"); - paragraphArgs.add(scratchDir.getPath()); + paragraphArgs.add("/work/" + scratchDir.getName()); ctx.getFileManager().addIntermediateFile(scratchDir); @@ -247,11 +264,7 @@ else if (!svVcf.exists()) paragraphArgs.add("-m"); paragraphArgs.add("/work/" + coverageFile.getName()); - if (ctx.getParams().optBoolean("debug", false)) - { - paragraphArgs.add("--debug"); - } - else + if (ctx.getParams().optBoolean("verbose", false)) { paragraphArgs.add("--verbose"); } From 5049933f560a0f70a76601dd140f55248a0a6fd4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 30 Nov 2024 07:14:41 -0800 Subject: [PATCH 14/53] Set paragraph temp dir --- .../org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index f85476a80..90c4f5c4d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -228,6 +228,7 @@ else if (!svVcf.exists()) ctx.getFileManager().addIntermediateFile(coverageFile); DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); + dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); From bf2cb04a2dba2494310e5d08630c4c3687a803d1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 30 Nov 2024 08:07:58 -0800 Subject: [PATCH 15/53] Drop UID from docker --- .../org/labkey/sequenceanalysis/analysis/GLNexusHandler.java | 2 -- .../sequenceanalysis/run/analysis/DeepVariantAnalysis.java | 2 -- .../labkey/sequenceanalysis/run/analysis/NextCladeHandler.java | 2 -- .../labkey/sequenceanalysis/run/analysis/PangolinHandler.java | 2 -- .../api/singlecell/pipeline/AbstractSingleCellPipelineStep.java | 2 -- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 2 -- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 2 -- 7 files changed, 14 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 9afe468b6..accfb9a8d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -297,8 +297,6 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker { writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); } - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index f96c55232..9cc33e928 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -275,8 +275,6 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai { writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); } - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t--entrypoint /bin/bash \\"); writer.println("\t-w /work \\"); Integer maxRam = SequencePipelineService.get().getMaxRam(); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index 0f4de0fab..0e815fe48 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -185,8 +185,6 @@ public static File runNextClade(File consensusFasta, Logger log, PipelineOutputT writer.println("\t-v \"${WD}:/work\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); writer.println("\tnextstrain/nextclade:latest \\"); writer.println("\t/bin/bash -c \"nextclade dataset get --name='sars-cov-2' --output-dir='/work/data/sars-cov-2';nextclade run --input-dataset='/work/data/sars-cov-2' --output-json '/work/" + jsonFile.getName() + "' '" + consensusFasta.getName() + "'\" && rm -Rf /work/data"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index 346540b6b..c79811449 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -309,8 +309,6 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); writer.println("\t-v \"${WD}:/work\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); writer.println("\tghcr.io/bimberlabinternal/pangolin:latest \\"); writer.println("\tpangolin" + extraArgString + " '/work/" + consensusFasta.getName() + "'"); diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index ce7ac101c..4c0638d92 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -352,8 +352,6 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t-e TMPDIR=/tmp \\"); if (dockerHomeDir != null) { diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 16a2fd6ba..60d739fd0 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1328,8 +1328,6 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St writer.println("\t-v \"${WD}:/work\" \\"); ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); writer.println("\tghcr.io/bimberlab/cellhashr:latest \\"); writer.println("\tRscript --vanilla " + localRScript.getName()); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index e96e9bd46..424147d47 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -608,10 +608,8 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); writer.println("\t-e RUST_BACKTRACE=1 \\"); writer.println("\t-e TMPDIR=/work/tmpDir \\"); - writer.println("\t-e USERID=$UID \\"); writer.println("\t--entrypoint /bin/bash \\"); writer.println("\t-w /work \\"); writer.println("\t" + DOCKER_CONTAINER_NAME + " \\"); From dfae19f81562190b93ed19d2c58669b569054d2d Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 30 Nov 2024 17:12:09 -0800 Subject: [PATCH 16/53] Refactor nimble to use DockerWrapper --- .../run/AbstractCommandWrapper.java | 2 +- .../sequenceanalysis/run/DockerWrapper.java | 69 ++++++++-- .../run/alignment/ParagraphStep.java | 19 +-- .../labkey/singlecell/run/NimbleHelper.java | 130 +++--------------- 4 files changed, 81 insertions(+), 139 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java index 2e3e84e2c..971abf382 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java @@ -48,7 +48,7 @@ abstract public class AbstractCommandWrapper implements CommandWrapper private boolean _warnNonZeroExits = true; private boolean _throwNonZeroExits = true; private Integer _lastReturnCode = null; - private final Map _environment = new HashMap<>(); + protected final Map _environment = new HashMap<>(); private final List _commandsExecuted = new ArrayList<>(); public AbstractCommandWrapper(@Nullable Logger logger) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index a923643c3..316c9ca9e 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -20,12 +20,15 @@ public class DockerWrapper extends AbstractCommandWrapper private final String _containerName; private final PipelineContext _ctx; private File _tmpDir = null; + private String _entryPoint = null; public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { super(log); _containerName = containerName; _ctx = ctx; + + _environment.clear(); } public void setTmpDir(File tmpDir) @@ -33,6 +36,11 @@ public void setTmpDir(File tmpDir) _tmpDir = tmpDir; } + public void setEntryPoint(String entryPoint) + { + _entryPoint = entryPoint; + } + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException { File localBashScript = new File(workDir, "docker.sh"); @@ -46,37 +54,72 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("#!/bin/bash"); writer.println("set -x"); writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); writer.println("$DOCKER pull " + _containerName); writer.println("$DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); + + // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: + File homeDir = new File(System.getProperty("user.home")); + if (homeDir.exists()) + { + final String searchString = "-v '" + homeDir.getPath() + "'"; + if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith)) + { + writer.println("\t-v \"" + homeDir.getPath() + ":/homeDir\" \\"); + } + else + { + _ctx.getLogger().debug("homeDir already present in docker volumes, omitting"); + } + + _environment.put("USER_HOME", homeDir.getPath()); + } + _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); if (_tmpDir != null) { - writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); + // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: + final String searchString = "-v '" + _tmpDir.getPath() + "'"; + if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith)) + { + writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); + } + else + { + _ctx.getLogger().debug("tmpDir already present in docker volumes, omitting"); + } + } + + if (_entryPoint != null) + { + writer.println("\t--entrypoint \"" + _entryPoint + "\"\\"); } - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); + + writer.println("\t-w " + workDir.getPath() + " \\"); Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); writer.println("\t--memory='" + maxRam + "g' \\"); } + + for (String key : _environment.keySet()) + { + writer.println("\t-e " + key + "=" + _environment.get(key) + " \\"); + } writer.println("\t" + _containerName + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); + writer.println("\t" + workDir.getPath() + "/" + dockerBashScript.getName()); + writer.println("DOCKER_EXIT_CODE=$?"); + writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE"); + writer.println("exit $DOCKER_EXIT_CODE"); dockerWriter.println("#!/bin/bash"); dockerWriter.println("set -x"); dockerWriter.println(StringUtils.join(containerArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); + dockerWriter.println("BASH_EXIT_CODE=$?"); + dockerWriter.println("echo 'Bash exit code: '$BASH_EXIT_CODE"); + dockerWriter.println("exit $BASH_EXIT_CODE"); } catch (IOException e) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 90c4f5c4d..15700e13e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -219,7 +219,7 @@ else if (!svVcf.exists()) } double readLength = json.getInt("read_length"); - writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength); + writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength); } catch (IOException e) { @@ -232,12 +232,9 @@ else if (!svVcf.exists()) List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); - dockerWrapper.ensureLocalCopy(so.getFile(), ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), ctx.getWorkingDirectory(), ctx.getFileManager()); - File paragraphOutDir = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile())); paragraphArgs.add("-o"); - paragraphArgs.add("/work/" + paragraphOutDir.getName()); + paragraphArgs.add(paragraphOutDir.getPath()); File scratchDir = new File(ctx.getOutputDir(), "pgScratch"); if (scratchDir.exists()) @@ -253,17 +250,15 @@ else if (!svVcf.exists()) } paragraphArgs.add("--scratch-dir"); - paragraphArgs.add("/work/" + scratchDir.getName()); + paragraphArgs.add(scratchDir.getPath()); ctx.getFileManager().addIntermediateFile(scratchDir); paragraphArgs.add("-i"); - dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager()); - paragraphArgs.add("/work/" + svVcf.getName()); + paragraphArgs.add(svVcf.getPath()); paragraphArgs.add("-m"); - paragraphArgs.add("/work/" + coverageFile.getName()); + paragraphArgs.add(coverageFile.getPath()); if (ctx.getParams().optBoolean("verbose", false)) { @@ -272,9 +267,7 @@ else if (!svVcf.exists()) paragraphArgs.add("-r"); File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(); - dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager()); - paragraphArgs.add("/work/" + genomeFasta.getName()); + paragraphArgs.add(genomeFasta.getPath()); if (ctx.getParams().optBoolean("retrieveReferenceSeq", false)) { diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 424147d47..f602f23fc 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -27,7 +27,7 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; @@ -309,11 +309,8 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin private File prepareReference(File genomeCsv, File genomeFasta, NimbleGenome genome, PipelineStepOutput output) throws PipelineJobException { - genomeCsv = ensureLocalCopy(genomeCsv, output); - genomeFasta = ensureLocalCopy(genomeFasta, output); - File nimbleJson = new File(getPipelineCtx().getWorkingDirectory(), genome.genomeId + ".json"); - runUsingDocker(Arrays.asList("python3", "-m", "nimble", "generate", "--opt-file", "/work/" + genomeFasta.getName(), "--file", "/work/" + genomeCsv.getName(), "--output_path", "/work/" + nimbleJson.getName()), output, "generate-" + genome.genomeId); + runUsingDocker(Arrays.asList("python3", "-m", "nimble", "generate", "--opt-file", genomeFasta.getPath(), "--file", genomeCsv.getPath(), "--output_path", nimbleJson.getPath()), output, "generate-" + genome.genomeId); if (!nimbleJson.exists()) { File doneFile = getNimbleDoneFile(getPipelineCtx().getWorkingDirectory(), "generate-" + genome.genomeId); @@ -410,20 +407,6 @@ private Map doAlignment(List genomes, List resultMap = new HashMap<>(); - File localBam = ensureLocalCopy(bam, output); - ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam), output); - - List localRefJsons = refJsons.stream().map(refJson -> { - try - { - return ensureLocalCopy(refJson, output); - } - catch (PipelineJobException e) - { - throw new RuntimeException(e); - } - }).collect(Collectors.toList()); - List alignArgs = new ArrayList<>(); alignArgs.add("python3"); alignArgs.add("-m"); @@ -447,13 +430,13 @@ private Map doAlignment(List genomes, List "/work/" + x.getName()).collect(Collectors.joining(","))); + alignArgs.add(refJsons.stream().map(File::getPath).collect(Collectors.joining(","))); alignArgs.add("--output"); - alignArgs.add("/work/" + alignmentTsvBase.getName()); + alignArgs.add(alignmentTsvBase.getPath()); alignArgs.add("--input"); - alignArgs.add("/work/" + localBam.getName()); + alignArgs.add(bam.getPath()); boolean dockerRan = runUsingDocker(alignArgs, output, "align.all"); for (NimbleGenome genome : genomes) @@ -492,7 +475,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt reportArgs.add("report"); reportArgs.add("-i"); - reportArgs.add("/work/" + alignResultsGz.getName()); + reportArgs.add(alignResultsGz.getPath()); File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt"); if (reportResultsGz.exists()) @@ -501,7 +484,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt } reportArgs.add("-o"); - reportArgs.add("/work/" + reportResultsGz.getName()); + reportArgs.add(reportResultsGz.getPath()); runUsingDocker(reportArgs, output, null, ctx); @@ -520,7 +503,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt plotArgs.add("plot"); plotArgs.add("--input_file"); - plotArgs.add("/work/" + alignResultsGz.getName()); + plotArgs.add(alignResultsGz.getPath()); File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); if (plotResultsHtml.exists()) @@ -529,7 +512,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt } plotArgs.add("--output_file"); - plotArgs.add("/work/" + plotResultsHtml.getName()); + plotArgs.add(plotResultsHtml.getPath()); runUsingDocker(plotArgs, output, null, ctx); @@ -565,10 +548,9 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString, PipelineContext ctx) throws PipelineJobException { - File localBashScript = new File(ctx.getWorkingDirectory(), "docker.sh"); - File dockerBashScript = new File(ctx.getWorkingDirectory(), "dockerRun.sh"); - output.addIntermediateFile(localBashScript); - output.addIntermediateFile(dockerBashScript); + DockerWrapper wrapper = new DockerWrapper(DOCKER_CONTAINER_NAME, ctx.getLogger(), ctx); + wrapper.setWorkingDir(ctx.getWorkingDirectory()); + wrapper.setEntryPoint("/bin/bash"); // Create temp folder: File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir"); @@ -585,51 +567,9 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu } } output.addIntermediateFile(tmpDir); + wrapper.setTmpDir(tmpDir); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript);PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull " + DOCKER_CONTAINER_NAME); - writer.println("$DOCKER run --rm=true \\"); - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - //int swap = 4*maxRam; - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-e RUST_BACKTRACE=1 \\"); - writer.println("\t-e TMPDIR=/work/tmpDir \\"); - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); - writer.println("\t" + DOCKER_CONTAINER_NAME + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - - dockerWriter.println("#!/bin/bash"); - dockerWriter.println("set -x"); - - dockerWriter.println(StringUtils.join(nimbleArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + wrapper.addToEnvironment("RUST_BACKTRACE", "1"); File doneFile = null; if (resumeString != null) @@ -648,9 +588,7 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu } } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); - rWrapper.setWorkingDir(ctx.getWorkingDirectory()); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + wrapper.executeWithDocker(nimbleArgs, ctx.getWorkingDirectory(), output); if (doneFile != null) { @@ -667,38 +605,6 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu return true; } - private File ensureLocalCopy(File input, PipelineStepOutput output) throws PipelineJobException - { - return ensureLocalCopy(input, output, getPipelineCtx()); - } - - public static File ensureLocalCopy(File input, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException - { - try - { - if (ctx.getWorkingDirectory().equals(input.getParentFile())) - { - return input; - } - - File local = new File(ctx.getWorkingDirectory(), input.getName()); - if (!local.exists()) - { - ctx.getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - private static class NimbleGenome { private final int genomeId; @@ -712,7 +618,7 @@ public NimbleGenome(JSONArray arr, int maxHitsToReport) throws PipelineJobExcept { if (arr.length() < 3) { - throw new PipelineJobException("Improper genome: " + arr.toString()); + throw new PipelineJobException("Improper genome: " + arr); } genomeId = arr.getInt(0); @@ -757,7 +663,7 @@ public Integer getNumMismatches() private String getVersion(PipelineStepOutput output) throws PipelineJobException { List nimbleArgs = new ArrayList<>(); - nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > /work/nimbleVersion.txt"); + nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > nimbleVersion.txt"); runUsingDocker(nimbleArgs, output, null); @@ -767,7 +673,7 @@ private String getVersion(PipelineStepOutput output) throws PipelineJobException throw new PipelineJobException("Unable to find file: " + outFile.getPath()); } - String ret = null; + String ret; try { ret = StringUtils.trimToNull(Files.readString(outFile.toPath())); From 06d33582cd652028f5271fc2a1e61885b685d26f Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 30 Nov 2024 17:24:45 -0800 Subject: [PATCH 17/53] Refactor nimble to use DockerWrapper --- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 2 +- .../org/labkey/singlecell/run/RepeatNimbleReportHandler.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 15700e13e..242c3fabe 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -130,7 +130,7 @@ else if (!svVcf.exists()) SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger()); List selectArgs = new ArrayList<>(); selectArgs.add("-select"); - selectArgs.add("SVTYPE != 'BND' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); + selectArgs.add("SVTYPE != 'BND' && SVTYPE != 'DUP' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); selectArgs.add("--exclude-filtered"); selectArgs.add("--exclude-filtered"); selectArgs.add("--sites-only-vcf-output"); diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 9228cd71e..c0e40daee 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -115,8 +115,7 @@ public void processFilesRemote(List inputFiles, JobContext c } // This will update these files in-place: - File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile, output, ctx); - File reportFile = NimbleHelper.runNimbleReport(alignmentFileLocal, so.getLibrary_id(), output, ctx); + File reportFile = NimbleHelper.runNimbleReport(alignmentFile, so.getLibrary_id(), output, ctx); if (!reportFile.exists()) { throw new PipelineJobException("Unable to find file: " + reportFile.getPath()); From 11401ed6456530cb2325e431c23a3f9e0d95ad9c Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 1 Dec 2024 07:36:27 -0800 Subject: [PATCH 18/53] Add entrypoint to ParagraphStep --- .../run/alignment/ParagraphStep.java | 2 ++ .../pipeline/SlurmExecutionEngine.java | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 242c3fabe..9311366d6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -229,6 +229,8 @@ else if (!svVcf.exists()) DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + dockerWrapper.setEntryPoint("/bin/bash"); + List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index bbc465ff9..2c528a2d1 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -1,7 +1,6 @@ package org.labkey.cluster.pipeline; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -14,7 +13,6 @@ import org.labkey.api.data.ContainerManager; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; -import org.labkey.api.pipeline.PipelineJobService; import org.labkey.api.pipeline.PipelineService; import org.labkey.api.pipeline.PipelineStatusFile; import org.labkey.api.util.FileUtil; @@ -312,8 +310,8 @@ else if (headerFound) String maxRSS = StringUtils.trimToNull(tokens[maxRssIdx]); if (maxRSS != null) { - long bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); - long requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB + double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); + double requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB if (bytes > requestInBytes) { info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G, requested memory was: " + getConfig().getRequestMemory() + "G"; @@ -780,13 +778,13 @@ private Pair getStatusFromQueue(ClusterJob job) // Based on: https://stackoverflow.com/questions/3758606/how-can-i-convert-byte-size-into-a-human-readable-format-in-java private static class FileSizeFormatter { - public static long convertStringRepresentationToBytes(final String value) + public static double convertStringRepresentationToBytes(final String value) { try { char unit = value.toUpperCase().charAt(value.length() - 1); long sizeFactor = getSizeFactor(unit); - long size = Long.parseLong(value.substring(0, value.length() - 1)); + double size = Double.parseDouble(value.substring(0, value.length() - 1)); return size * sizeFactor; } @@ -796,11 +794,11 @@ public static long convertStringRepresentationToBytes(final String value) } } - public static long convertBytesToUnit(final long bytes, final char unit) + public static double convertBytesToUnit(final double bytes, final char unit) { long sizeFactor = getSizeFactor(unit); - return bytes / sizeFactor; + return bytes / (double)sizeFactor; } private static long getSizeFactor(char unit) @@ -826,11 +824,11 @@ public static class TestCase @Test public void testFileSizeFormatter() { - long bytes = FileSizeFormatter.convertStringRepresentationToBytes("1362624K"); - Assert.assertEquals("Incorrect byte value", 1395326976, bytes); + double bytes = FileSizeFormatter.convertStringRepresentationToBytes("1362624K"); + Assert.assertEquals("Incorrect byte value", 1395326976.0, bytes, 0.0); - long val2 = FileSizeFormatter.convertBytesToUnit(bytes, 'K'); - Assert.assertEquals("Incorrect string value", 1362624, val2); + double val2 = FileSizeFormatter.convertBytesToUnit(bytes, 'K'); + Assert.assertEquals("Incorrect string value", 1362624.0, val2, 0.0); } } } From d01899d71b84330b3ffbf955f7201a8c0e8996ba Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 1 Dec 2024 16:15:07 -0800 Subject: [PATCH 19/53] Append to log when logging slurm memory --- .../src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 2c528a2d1..ab610d413 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -27,6 +27,7 @@ import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -319,7 +320,7 @@ else if (headerFound) PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); if (sf != null) { - try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()))) + try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()), StandardOpenOption.APPEND)) { writer.println(info + ". Raw slurm value: " + maxRSS); } From 9a4feeb7af531827ea3598a5c25afbd55384d297 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 1 Dec 2024 18:22:14 -0800 Subject: [PATCH 20/53] Pass tmpDir to nimble using environment --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index f602f23fc..c1983a5d0 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -567,7 +567,8 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu } } output.addIntermediateFile(tmpDir); - wrapper.setTmpDir(tmpDir); + wrapper.setTmpDir(null); + wrapper.addToEnvironment("TMPDIR", tmpDir.getPath()); wrapper.addToEnvironment("RUST_BACKTRACE", "1"); From 44aa8ba8b63a89ecfac5102adca14dd002e3f9c4 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 1 Dec 2024 20:58:15 -0800 Subject: [PATCH 21/53] Parse request memory and mem used for slurm jobs --- .../pipeline/SlurmExecutionEngine.java | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index ab610d413..017ce8e46 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -247,6 +247,8 @@ protected Pair getStatusForJob(ClusterJob job, Container c) int stateIdx = -1; int hostnameIdx = -1; int maxRssIdx = -1; + int reqMemIdx = -1; + String reqMem = null; for (String line : ret) { line = StringUtils.trimToNull(line); @@ -263,6 +265,7 @@ protected Pair getStatusForJob(ClusterJob job, Container c) stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); maxRssIdx = header.indexOf("MAXRSS"); + reqMemIdx = header.indexOf("REQMEM"); if (stateIdx == -1) { @@ -303,6 +306,16 @@ else if (headerFound) } } + if (reqMemIdx > -1 && reqMemIdx < tokens.length) + { + String val = StringUtils.trimToNull(tokens[reqMemIdx]); + if (val != null) + { + reqMem = val; + } + + } + // NOTE: if the line has blank ending columns, trimmed lines might lack that value if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) { @@ -312,21 +325,28 @@ else if (headerFound) if (maxRSS != null) { double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); - double requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB - if (bytes > requestInBytes) + if (reqMem == null) { - info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G, requested memory was: " + getConfig().getRequestMemory() + "G"; - - PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); - if (sf != null) + _log.warn("Unable to find ReqMem for slurm job: " + job.getClusterId()); + } + else + { + double requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(reqMem); + if (bytes > requestInBytes) { - try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()), StandardOpenOption.APPEND)) - { - writer.println(info + ". Raw slurm value: " + maxRSS); - } - catch (FileNotFoundException e) + info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G, requested memory was: " + FileSizeFormatter.convertBytesToUnit(requestInBytes, 'G'); + + PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); + if (sf != null) { - _log.error("Unable to find log file for job, " + job.getJobId() + ": " + sf.getFilePath()); + try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()), StandardOpenOption.APPEND)) + { + writer.println(info + ". Raw slurm value: " + maxRSS); + } + catch (FileNotFoundException e) + { + _log.error("Unable to find log file for job, " + job.getJobId() + ": " + sf.getFilePath()); + } } } } From c0af553cb8bb20c971bcd2828fa1fecf1acf5ac2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 1 Dec 2024 21:00:52 -0800 Subject: [PATCH 22/53] Option to run docker prune before jobs --- .../api/sequenceanalysis/run/DockerWrapper.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 316c9ca9e..0dbee4fd3 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -21,6 +21,7 @@ public class DockerWrapper extends AbstractCommandWrapper private final PipelineContext _ctx; private File _tmpDir = null; private String _entryPoint = null; + private boolean _runPrune = true; public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { @@ -41,6 +42,11 @@ public void setEntryPoint(String entryPoint) _entryPoint = entryPoint; } + public void setRunPrune(boolean runPrune) + { + _runPrune = runPrune; + } + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException { File localBashScript = new File(workDir, "docker.sh"); @@ -56,6 +62,11 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("WD=`pwd`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + if (_runPrune) + { + writer.println("$DOCKER image prune -f"); + } + writer.println("$DOCKER pull " + _containerName); writer.println("$DOCKER run --rm=true \\"); From d5691894ec65489f51e1e5b9ef1a4b0d7b59ac7a Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 2 Dec 2024 13:16:48 -0800 Subject: [PATCH 23/53] Switch nimble to pass align-specific tempDir on the command line, rather than use environment --- .../labkey/singlecell/run/NimbleHelper.java | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index c1983a5d0..bc809705e 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -438,6 +438,25 @@ private Map doAlignment(List genomes, List nimbleArgs, PipelineStepOutpu wrapper.setWorkingDir(ctx.getWorkingDirectory()); wrapper.setEntryPoint("/bin/bash"); - // Create temp folder: - File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir"); - if (tmpDir.exists()) - { - try - { - FileUtils.deleteDirectory(tmpDir); - Files.createDirectory(tmpDir.toPath()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - output.addIntermediateFile(tmpDir); wrapper.setTmpDir(null); - wrapper.addToEnvironment("TMPDIR", tmpDir.getPath()); wrapper.addToEnvironment("RUST_BACKTRACE", "1"); From 8157f85e51e38d80246924604a35c2019400a7ae Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 2 Dec 2024 13:41:49 -0800 Subject: [PATCH 24/53] Add --group-add keep-groups to docker --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 0dbee4fd3..5b2e35f5f 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -62,13 +62,14 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("WD=`pwd`"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + writer.println("$DOCKER pull " + _containerName); if (_runPrune) { writer.println("$DOCKER image prune -f"); } - writer.println("$DOCKER pull " + _containerName); writer.println("$DOCKER run --rm=true \\"); + writer.println("\t--group-add keep-groups \\"); // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: File homeDir = new File(System.getProperty("user.home")); From 753356c869931ac0685cff0d40abab96b8206647 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 2 Dec 2024 21:40:01 -0800 Subject: [PATCH 25/53] Convert AbstractSingleCellPipelineStep to use DockerWrapper --- .../AbstractSingleCellPipelineStep.java | 70 ++++--------------- 1 file changed, 12 insertions(+), 58 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index 4c0638d92..e06cdd4ca 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -17,7 +17,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; @@ -301,6 +301,8 @@ protected static SeuratToolParameter getSeuratThreadsParam() public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List lines, @Nullable Integer seuratThreads, @Nullable String dockerHomeDir) throws PipelineJobException { + DockerWrapper wrapper = new DockerWrapper(dockerContainerName, ctx.getLogger(), ctx); + File localRScript = new File(ctx.getOutputDir(), FileUtil.makeLegalName(outputPrefix + ".R").replaceAll(" ", "_")); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { @@ -311,72 +313,24 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC throw new PipelineJobException(e); } - File localBashScript = new File(ctx.getOutputDir(), "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) + if (seuratThreads != null) { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull " + dockerContainerName); - writer.println("$DOCKER run --rm=true \\"); - Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); - if (maxThreads != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS=" + maxThreads + " \\"); - } - - if (seuratThreads != null) + if (maxThreads != null && maxThreads < seuratThreads) { - if (maxThreads != null && maxThreads < seuratThreads) - { - seuratThreads = maxThreads; - } - - writer.println("\t-e SEURAT_MAX_THREADS=" + seuratThreads + " \\"); + seuratThreads = maxThreads; } - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - //int swap = 4*maxRam; - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-e TMPDIR=/tmp \\"); - if (dockerHomeDir != null) - { - writer.println("\t-e HOME=" + dockerHomeDir + " \\"); - } - writer.println("\t-w /work \\"); - //NOTE: this seems to disrupt packages installed into home - //writer.println("\t-e HOME=/homeDir \\"); - writer.println("\t" + dockerContainerName + " \\"); - writer.println("\tRscript --vanilla '" + localRScript.getName() + "'"); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Bash script complete: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); + wrapper.addToEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); - rWrapper.setWorkingDir(ctx.getOutputDir()); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); + wrapper.setTmpDir(tmpDir); + + wrapper.setWorkingDir(ctx.getOutputDir()); + wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", "'" + localRScript.getName() + "'"), ctx.getWorkingDirectory(), ctx.getFileManager()); localRScript.delete(); - localBashScript.delete(); } public String getDockerHomeDir() From edd587a07687beb48a4ca420a5e2d9a8ac03c006 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 11:13:34 -0800 Subject: [PATCH 26/53] Signficiant refactor of docker in pipeline jobs to migrate everything to DockerWrapper and improve handling of input file locations --- .../pipeline/JobResourceSettings.java | 7 + .../pipeline/SequencePipelineService.java | 6 + .../sequenceanalysis/run/DockerWrapper.java | 83 ++++++---- .../SequencePipelineServiceImpl.java | 22 ++- .../analysis/GLNexusHandler.java | 129 +++++---------- .../pipeline/SequenceJob.java | 15 ++ .../run/analysis/DeepVariantAnalysis.java | 101 ++---------- .../run/analysis/NextCladeHandler.java | 69 +------- .../run/analysis/PangolinHandler.java | 67 +------- .../AbstractSingleCellPipelineStep.java | 20 ++- singlecell/resources/chunks/AppendMetadata.R | 9 +- singlecell/resources/chunks/AppendNimble.R | 9 +- singlecell/resources/chunks/AppendTcr.R | 9 +- singlecell/resources/chunks/AvgExpression.R | 9 +- .../chunks/ClassifyTNKByExpression.R | 9 +- singlecell/resources/chunks/RunConga.R | 9 +- singlecell/resources/chunks/StudyMetadata.R | 9 +- .../chunks/SummarizeTCellActivation.R | 9 +- singlecell/resources/chunks/TrainCelltypist.R | 2 +- .../resources/chunks/UpdateSeuratPrototype.R | 9 +- .../singlecell/CellHashingServiceImpl.java | 152 +++--------------- .../analysis/AbstractSingleCellHandler.java | 2 +- .../pipeline/singlecell/PredictScTour.java | 32 ++-- .../singlecell/RunCelltypistCustomModel.java | 33 ++-- .../pipeline/singlecell/TrainCelltypist.java | 2 +- 25 files changed, 281 insertions(+), 542 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java index 2f7a9f122..a83b2aca3 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java @@ -1,7 +1,9 @@ package org.labkey.api.sequenceanalysis.pipeline; +import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Container; +import java.io.File; import java.util.Collection; import java.util.List; @@ -15,4 +17,9 @@ public interface JobResourceSettings List getParams(); Collection getDockerVolumes(Container c); + + default @Nullable File inferDockerVolume(File input) + { + return null; + } } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index 64097cd84..dd19bba68 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance) abstract public Collection getDockerVolumes(Container c); + /** + * The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container. + * This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder. + */ + abstract public @Nullable File inferDockerVolume(File input); + abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 5b2e35f5f..5dcbe3ac6 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -1,8 +1,8 @@ package org.labkey.api.sequenceanalysis.run; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; @@ -13,7 +13,12 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; public class DockerWrapper extends AbstractCommandWrapper { @@ -22,6 +27,7 @@ public class DockerWrapper extends AbstractCommandWrapper private File _tmpDir = null; private String _entryPoint = null; private boolean _runPrune = true; + private String _alternateUserHome = null; public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { @@ -32,6 +38,11 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx) _environment.clear(); } + public void setAlternateUserHome(String alternateUserHome) + { + _alternateUserHome = alternateUserHome; + } + public void setTmpDir(File tmpDir) { _tmpDir = tmpDir; @@ -48,6 +59,11 @@ public void setRunPrune(boolean runPrune) } public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException + { + executeWithDocker(containerArgs, workDir, tracker, null); + } + + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection inputFiles) throws PipelineJobException { File localBashScript = new File(workDir, "docker.sh"); File dockerBashScript = new File(workDir, "dockerRun.sh"); @@ -75,25 +91,33 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline File homeDir = new File(System.getProperty("user.home")); if (homeDir.exists()) { - final String searchString = "-v '" + homeDir.getPath() + "'"; - if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith)) + if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith)) { - writer.println("\t-v \"" + homeDir.getPath() + ":/homeDir\" \\"); + writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\"); } else { - _ctx.getLogger().debug("homeDir already present in docker volumes, omitting"); + _ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add"); } _environment.put("USER_HOME", homeDir.getPath()); } - _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); + if (_alternateUserHome != null) + { + _environment.put("HOME", _alternateUserHome); + } + + _ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\")); + if (inputFiles != null) + { + inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\")); + } + if (_tmpDir != null) { // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: - final String searchString = "-v '" + _tmpDir.getPath() + "'"; - if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith)) + if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith)) { writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); } @@ -101,6 +125,8 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline { _ctx.getLogger().debug("tmpDir already present in docker volumes, omitting"); } + + addToEnvironment("TMPDIR", _tmpDir.getPath()); } if (_entryPoint != null) @@ -109,6 +135,8 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline } writer.println("\t-w " + workDir.getPath() + " \\"); + addToEnvironment("WORK_DIR", workDir.getPath()); + Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { @@ -121,7 +149,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("\t-e " + key + "=" + _environment.get(key) + " \\"); } writer.println("\t" + _containerName + " \\"); - writer.println("\t" + workDir.getPath() + "/" + dockerBashScript.getName()); + writer.println("\t/bin/bash " + dockerBashScript.getPath()); writer.println("DOCKER_EXIT_CODE=$?"); writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE"); writer.println("exit $DOCKER_EXIT_CODE"); @@ -141,29 +169,30 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline execute(Arrays.asList("/bin/bash", localBashScript.getPath())); } - public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + private Collection inspectInputFiles(Collection inputFiles) { - try + Set toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet()); + if (!toAdd.isEmpty()) { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } + Set paths = new HashSet<>(); + toAdd.forEach(x -> { + _ctx.getLogger().debug("Adding volume for path: " + x.getPath()); - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } + File converted = SequencePipelineService.get().inferDockerVolume(x); + if (!x.equals(converted)) + { + _ctx.getLogger().debug("added as: " + converted.getPath()); + } - output.addIntermediateFile(local); + if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith)) + { + paths.add(converted); + } + }); - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); + return paths; } + + return Collections.emptySet(); } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index 9716cac61..d1b808338 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -472,16 +472,28 @@ public Collection getDockerVolumes(Container c) { if (settings.isAvailable(c)) { - for (String volume : settings.getDockerVolumes(c)) - { - volumeLines.add("-v '" + volume + "':'" + volume + "'"); - } + return Collections.unmodifiableCollection(settings.getDockerVolumes(c)); } } return volumeLines; } + @Override + public @Nullable File inferDockerVolume(File input) + { + for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings()) + { + File ret = settings.inferDockerVolume(input); + if (ret != null) + { + return ret; + } + } + + return null; + } + @Override public List getSequenceJobInputFiles(PipelineJob job) { @@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings) @Override public Set getResourceSettings() { - return _resourceSettings; + return Collections.unmodifiableSet(_resourceSettings); } @Override diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index accfb9a8d..dea268e15 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -23,6 +23,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileType; import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; @@ -235,118 +236,68 @@ public GLNexusWrapper(Logger logger) super(logger); } - private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException { - try - { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } + DockerWrapper wrapper = new DockerWrapper("ghcr.io/dnanexus-rnd/glnexus:" + binVersion, ctx.getLogger(), ctx); + wrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + wrapper.setWorkingDir(ctx.getWorkingDirectory()); - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; + File bed = new File(ctx.getWorkingDirectory(), "contig.bed"); + tracker.addIntermediateFile(bed); + try (PrintWriter bedWriter = PrintWriters.getPrintWriter(bed)) + { + // Create a single-contig BED file: + bedWriter.println(rec.getSequenceName() + "\t0\t" + rec.getSequenceLength()); } catch (IOException e) { throw new PipelineJobException(e); } - } - public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException - { - File workDir = outputVcf.getParentFile(); - tracker.addIntermediateFile(outputVcf); - tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); + List dockerArgs = new ArrayList<>(); + dockerArgs.add("glnexus_cli"); + dockerArgs.add("--config " + configType); - List gvcfsLocal = new ArrayList<>(); - for (File f : inputGvcfs) + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) { - gvcfsLocal.add(ensureLocalCopy(f, workDir, tracker)); - ensureLocalCopy(new File(f.getPath() + ".tbi"), workDir, tracker); + dockerArgs.add("--mem-gbytes " + maxRam); } - File localBashScript = new File(workDir, "docker.sh"); - tracker.addIntermediateFile(localBashScript); - - File bed = new File(workDir, "contig.bed"); - tracker.addIntermediateFile(bed); + dockerArgs.add("--bed " + bed.getPath()); + dockerArgs.add("--trim-uncalled-alleles"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript);PrintWriter bedWriter = PrintWriters.getPrintWriter(bed)) + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (maxThreads != null) { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull ghcr.io/dnanexus-rnd/glnexus:" + binVersion); - writer.println("$DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t -w /work \\"); - if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) - { - writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); - } + dockerArgs.add("--threads " + maxThreads); + } - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t--memory='" + maxRam + "g' \\"); - } - writer.println("\tghcr.io/dnanexus-rnd/glnexus:" + binVersion + " \\"); - writer.println("\tglnexus_cli \\"); - writer.println("\t--config " + configType + " \\"); - writer.println("\t--bed /work/" + bed.getName() + " \\"); - writer.println("\t--trim-uncalled-alleles \\"); + inputGvcfs.forEach(f -> { + dockerArgs.add(f.getPath()); + }); - if (maxRam != null) - { - writer.println("\t--mem-gbytes " + maxRam + "\\"); - } + File bcftools = BcftoolsRunner.getBcfToolsPath(); + File bgzip = BgzipRunner.getExe(); + dockerArgs.add(" | " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); - Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); - if (maxThreads != null) + // Command will fail if this exists: + File dbDir = new File (ctx.getWorkingDirectory(), "GLnexus.DB"); + tracker.addIntermediateFile(dbDir); + if (dbDir.exists()) + { + getLogger().debug("Deleting pre-existing GLnexus.DB dir"); + try { - writer.println("\t--threads " + maxThreads + " \\"); + FileUtils.deleteDirectory(dbDir); } - - gvcfsLocal.forEach(f -> { - writer.println("\t/work/" + f.getName() + " \\"); - }); - - File bcftools = BcftoolsRunner.getBcfToolsPath(); - File bgzip = BgzipRunner.getExe(); - writer.println("\t| " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); - - // Command will fail if this exists: - File dbDir = new File (outputVcf.getParentFile(), "GLnexus.DB"); - tracker.addIntermediateFile(dbDir); - if (dbDir.exists()) + catch (IOException e) { - getLogger().debug("Deleting pre-existing GLnexus.DB dir"); - FileUtils.deleteDirectory(dbDir); + throw new PipelineJobException(e); } - - // Create a single-contig BED file: - bedWriter.println(rec.getSequenceName() + "\t0\t" + rec.getSequenceLength()); - } - catch (IOException e) - { - throw new PipelineJobException(e); } - setWorkingDir(workDir); - execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + wrapper.executeWithDocker(dockerArgs, ctx.getWorkingDirectory(), tracker, inputGvcfs); if (!outputVcf.exists()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 32dc295ed..0f0db420c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -58,6 +58,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * Created by bimber on 8/31/2016. @@ -193,6 +194,20 @@ public Collection getDockerVolumes() public void setDockerVolumes(Collection dockerVolumes) { + // TODO: this is for legacy jobs that included the -v arg. Eventually remove: + if (dockerVolumes.stream().anyMatch(x -> x.startsWith("-v"))) + { + dockerVolumes = dockerVolumes.stream().map(x -> { + if (x.startsWith("-v")) + { + x = x.split(":")[1]; + x = x.substring( 1, x.length() - 1); + } + + return x; + }).collect(Collectors.toSet()); + } + _dockerVolumes = dockerVolumes; } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 9cc33e928..3f8968831 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -1,7 +1,5 @@ package org.labkey.sequenceanalysis.run.analysis; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; @@ -20,13 +18,10 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; -import org.labkey.api.writer.PrintWriters; -import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -194,32 +189,6 @@ public DeepVariantWrapper(Logger logger) super(logger); } - private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException - { - try - { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } - - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs, PipelineContext ctx) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); @@ -230,24 +199,17 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); } - File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker); - ensureLocalCopy(SequenceUtil.getExpectedIndex(inputBam), workDir, tracker); - - File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker); - ensureLocalCopy(new File(refFasta.getPath() + ".fai"), workDir, tracker); - ensureLocalCopy(new File(FileUtil.getBaseName(refFasta.getPath()) + ".dict"), workDir, tracker); + List inputFiles = new ArrayList<>(); - File localBashScript = new File(workDir, "docker.sh"); - File dockerBashScript = new File(workDir, "dockerRun.sh"); - tracker.addIntermediateFile(localBashScript); - tracker.addIntermediateFile(dockerBashScript); + inputFiles.add(inputBam); + inputFiles.add(refFasta); List bashArgs = new ArrayList<>(Arrays.asList("/opt/deepvariant/bin/run_deepvariant")); bashArgs.add("--make_examples_extra_args='normalize_reads=true'"); - bashArgs.add("--ref=/work/" + refFastaLocal.getName()); - bashArgs.add("--reads=/work/" + inputBamLocal.getName()); - bashArgs.add("--output_gvcf=/work/" + outputGvcf.getName()); - bashArgs.add("--output_vcf=/work/" + outputVcf.getName()); + bashArgs.add("--ref=" + refFasta.getPath()); + bashArgs.add("--reads=" + inputBam.getPath()); + bashArgs.add("--output_gvcf=" + outputGvcf.getPath()); + bashArgs.add("--output_vcf=" + outputVcf.getPath()); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); if (maxThreads != null) { @@ -259,50 +221,9 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai bashArgs.addAll(extraArgs); } - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull google/deepvariant:" + binVersion); - writer.println("$DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) - { - writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); - } - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - writer.println("\tgoogle/deepvariant:" + binVersion + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - - dockerWriter.println("#!/bin/bash"); - dockerWriter.println("set -x"); - dockerWriter.println(StringUtils.join(bashArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - setWorkingDir(workDir); - execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + DockerWrapper wrapper = new DockerWrapper("google/deepvariant:" + binVersion, ctx.getLogger(), ctx); + wrapper.setEntryPoint("/bin/bash"); + wrapper.executeWithDocker(bashArgs, ctx.getWorkingDirectory(), tracker, inputFiles); if (!outputGvcf.exists()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index 0e815fe48..fe4b571c3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -3,7 +3,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.json.JSONArray; @@ -31,11 +30,9 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; -import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; import org.labkey.sequenceanalysis.util.SequenceUtil; @@ -43,7 +40,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.PrintWriter; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -141,67 +137,12 @@ public static File getJsonFile(File outputDir, File consensusFasta) public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir, PipelineContext ctx) throws PipelineJobException { - if (!consensusFasta.getParentFile().equals(outputDir)) - { - try - { - File consensusFastaLocal = new File(outputDir, consensusFasta.getName()); - log.info("Copying FASTA locally: " + consensusFastaLocal.getPath()); - FileUtils.copyFile(consensusFasta, consensusFastaLocal); - tracker.addIntermediateFile(consensusFastaLocal); - consensusFasta = consensusFastaLocal; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - File jsonFile = getJsonFile(outputDir, consensusFasta); - File localBashScript = new File(outputDir, "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull nextstrain/nextclade:latest"); - writer.println("$DOCKER run --rm=true \\"); - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-w /work \\"); - writer.println("\tnextstrain/nextclade:latest \\"); - writer.println("\t/bin/bash -c \"nextclade dataset get --name='sars-cov-2' --output-dir='/work/data/sars-cov-2';nextclade run --input-dataset='/work/data/sars-cov-2' --output-json '/work/" + jsonFile.getName() + "' '" + consensusFasta.getName() + "'\" && rm -Rf /work/data"); - writer.println(""); - writer.println("echo 'Bash script complete'"); - writer.println(""); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + DockerWrapper wrapper = new DockerWrapper("nextstrain/nextclade:latest", ctx.getLogger(), ctx); + File dataDir = new File(outputDir, "data"); - tracker.addIntermediateFile(localBashScript); + wrapper.executeWithDocker(Arrays.asList("bin/bash", "-c \"nextclade dataset get --name='sars-cov-2' --output-dir='" + dataDir.getPath() + "/sars-cov-2';nextclade run --input-dataset='" + dataDir.getPath() + "/sars-cov-2' --output-json '" + jsonFile.getPath() + "' '" + consensusFasta.getPath() + "'\" && rm -Rf " + dataDir), ctx.getWorkingDirectory(), tracker); if (!jsonFile.exists()) { @@ -217,7 +158,7 @@ private static JSONObject parseNextClade(File jsonFile, Logger log) throws Pipel { JSONObject results = new JSONObject(IOUtil.readFully(is)); JSONArray samples = results.getJSONArray("results"); - if (samples.length() == 0) + if (samples.isEmpty()) { log.info("No samples found in NextClade JSON, this probably means no clade was assigned"); return null; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index c79811449..2cccb8b9e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -30,19 +30,16 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; -import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; -import java.io.PrintWriter; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -266,65 +263,9 @@ public static File getRenamedPangolinOutput(File consensusFasta, PANGO_MODE mode private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs, PipelineContext ctx) throws PipelineJobException { - if (!consensusFasta.getParentFile().equals(outputDir)) - { - try - { - File consensusFastaLocal = new File(outputDir, consensusFasta.getName()); - log.info("Copying FASTA locally: " + consensusFastaLocal.getPath()); - FileUtils.copyFile(consensusFasta, consensusFastaLocal); - tracker.addIntermediateFile(consensusFastaLocal); - consensusFasta = consensusFastaLocal; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - File localBashScript = new File(outputDir, "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull ghcr.io/bimberlabinternal/pangolin:latest"); - writer.println("$DOCKER run --rm=true \\"); - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-w /work \\"); - writer.println("\tghcr.io/bimberlabinternal/pangolin:latest \\"); - writer.println("\tpangolin" + extraArgString + " '/work/" + consensusFasta.getName() + "'"); - writer.println(""); - writer.println("echo 'Bash script complete'"); - writer.println(""); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); - tracker.addIntermediateFile(localBashScript); + DockerWrapper wrapper = new DockerWrapper("ghcr.io/bimberlabinternal/pangolin:latest", ctx.getLogger(), ctx); + String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); + wrapper.executeWithDocker(Arrays.asList("pangolin" + extraArgString + " '" + consensusFasta.getPath() + "'"), ctx.getWorkingDirectory(), tracker); File output = new File(outputDir, "lineage_report.csv"); if (!output.exists()) diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index e06cdd4ca..ae35280aa 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -271,7 +272,7 @@ protected boolean hasCompleted() protected void executeR(SequenceOutputHandler.JobContext ctx, File rmd, String outputPrefix) throws PipelineJobException { List lines = new ArrayList<>(); - lines.add("rmarkdown::render(output_file = '" + getExpectedHtmlFile(ctx, outputPrefix).getName() + "', input = '" + rmd.getName() + "', intermediates_dir = '/work')"); + lines.add("rmarkdown::render(output_file = '" + getExpectedHtmlFile(ctx, outputPrefix).getName() + "', input = '" + rmd.getName() + "', intermediates_dir = '" + ctx.getWorkingDirectory() + "')"); lines.add("print('Rmarkdown complete')"); lines.add(""); @@ -287,7 +288,7 @@ protected void executeR(SequenceOutputHandler.JobContext ctx, File rmd, String o seuratThreads = getProvider().getParameterByName(SEURAT_THREADS).extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class, null); } - executeR(ctx, getDockerContainerName(), outputPrefix, lines, seuratThreads, getDockerHomeDir()); + executeR(ctx, getDockerContainerName(), outputPrefix, lines, seuratThreads, getDockerHomeDir(), getAdditionalDockerInputs(ctx)); handlePossibleFailure(ctx, outputPrefix); } @@ -299,9 +300,13 @@ protected static SeuratToolParameter getSeuratThreadsParam() }}, null); } - public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List lines, @Nullable Integer seuratThreads, @Nullable String dockerHomeDir) throws PipelineJobException + public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List lines, @Nullable Integer seuratThreads, @Nullable String alternateUserHomeDir, Collection additionalDockerInputs) throws PipelineJobException { DockerWrapper wrapper = new DockerWrapper(dockerContainerName, ctx.getLogger(), ctx); + if (alternateUserHomeDir != null) + { + wrapper.setAlternateUserHome(alternateUserHomeDir); + } File localRScript = new File(ctx.getOutputDir(), FileUtil.makeLegalName(outputPrefix + ".R").replaceAll(" ", "_")); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) @@ -328,7 +333,7 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC wrapper.setTmpDir(tmpDir); wrapper.setWorkingDir(ctx.getOutputDir()); - wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", "'" + localRScript.getName() + "'"), ctx.getWorkingDirectory(), ctx.getFileManager()); + wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", "'" + localRScript.getName() + "'"), ctx.getWorkingDirectory(), ctx.getFileManager(), additionalDockerInputs); localRScript.delete(); } @@ -439,7 +444,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + return Collections.emptySet(); + } + protected String printInputFile(SeuratObjectWrapper so) { return "'" + so.getFile().getName() + "'"; diff --git a/singlecell/resources/chunks/AppendMetadata.R b/singlecell/resources/chunks/AppendMetadata.R index 406e02395..a79f3f2c6 100644 --- a/singlecell/resources/chunks/AppendMetadata.R +++ b/singlecell/resources/chunks/AppendMetadata.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 040317e71..df76eaf49 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) # NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted diff --git a/singlecell/resources/chunks/AppendTcr.R b/singlecell/resources/chunks/AppendTcr.R index b4c5c41a6..e8ae8572e 100644 --- a/singlecell/resources/chunks/AppendTcr.R +++ b/singlecell/resources/chunks/AppendTcr.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/AvgExpression.R b/singlecell/resources/chunks/AvgExpression.R index 78426ff12..aacfe01ed 100644 --- a/singlecell/resources/chunks/AvgExpression.R +++ b/singlecell/resources/chunks/AvgExpression.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) GenerateAveragedData <- function(seuratObj, groupFields, addMetadata) { diff --git a/singlecell/resources/chunks/ClassifyTNKByExpression.R b/singlecell/resources/chunks/ClassifyTNKByExpression.R index 2e3f7bb1a..9f19b7fae 100644 --- a/singlecell/resources/chunks/ClassifyTNKByExpression.R +++ b/singlecell/resources/chunks/ClassifyTNKByExpression.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/RunConga.R b/singlecell/resources/chunks/RunConga.R index 36091ee4e..5f07adccb 100644 --- a/singlecell/resources/chunks/RunConga.R +++ b/singlecell/resources/chunks/RunConga.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R index ee0726402..d3d10453e 100644 --- a/singlecell/resources/chunks/StudyMetadata.R +++ b/singlecell/resources/chunks/StudyMetadata.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/SummarizeTCellActivation.R b/singlecell/resources/chunks/SummarizeTCellActivation.R index ae7fa52d8..167ea4b6a 100644 --- a/singlecell/resources/chunks/SummarizeTCellActivation.R +++ b/singlecell/resources/chunks/SummarizeTCellActivation.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/TrainCelltypist.R b/singlecell/resources/chunks/TrainCelltypist.R index 543c51f85..6afba85cb 100644 --- a/singlecell/resources/chunks/TrainCelltypist.R +++ b/singlecell/resources/chunks/TrainCelltypist.R @@ -6,4 +6,4 @@ datasetId <- names(seuratObjects)[[1]] printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) -RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = '/work') \ No newline at end of file +RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = Sys.getEnv('WORK_DIR')) \ No newline at end of file diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R index e9af18a97..bbcfc7071 100644 --- a/singlecell/resources/chunks/UpdateSeuratPrototype.R +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getEnv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 60d739fd0..a79f9bf27 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -31,14 +31,16 @@ import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.DefaultPipelineStepOutput; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.singlecell.CellHashingService; import org.labkey.api.singlecell.model.CDNA_Library; import org.labkey.api.singlecell.model.Sample; @@ -1142,50 +1144,6 @@ else if ("Negative".equals(line[htoClassIdx])) } } - public File ensureLocalCopy(File input, File outputDir, Logger log, Set toDelete) throws PipelineJobException - { - if (!outputDir.equals(input.getParentFile())) - { - try - { - //needed for docker currently - log.debug("Copying file to working directory: " + input.getPath()); - File dest = new File(outputDir, input.getName()); - if (dest.exists()) - { - log.debug("deleting existing folder: " + dest.getPath()); - if (input.isDirectory()) - { - FileUtils.deleteDirectory(dest); - } - else - { - dest.delete(); - } - } - - if (input.isDirectory()) - { - FileUtils.copyDirectory(input, dest); - } - else - { - FileUtils.copyFile(input, dest); - } - - toDelete.add(dest); - - return dest; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - return input; - } - private File getExpectedCallsFile(File outputDir, String basename) { return new File(outputDir, basename + CALL_EXTENSION); @@ -1200,24 +1158,20 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St { log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath()); - Set toDelete = new HashSet<>(); - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); + List inputFiles = new ArrayList<>(); File molInfo = getMolInfoFileFromCounts(citeSeqCountOutDir); if (!molInfo.exists()) { throw new PipelineJobException("File not found, cannot calculate saturation: " + molInfo.getPath()); } - - molInfo = ensureLocalCopy(molInfo, outputDir, log, toDelete); + inputFiles.add(molInfo); // h5 file used by demuxEM/demuxmix: - File h5 = null; - if (parameters.h5File != null) + File h5 = parameters.h5File; + if (h5 != null) { - h5 = ensureLocalCopy(parameters.h5File, outputDir, log, toDelete); + inputFiles.add(h5); } if (CALLING_METHOD.requiresH5(parameters.methods) && h5 == null) @@ -1225,18 +1179,14 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException("No h5 file provided, but demuxEM/demuxmix was specified"); } - citeSeqCountOutDir = ensureLocalCopy(citeSeqCountOutDir, outputDir, log, toDelete); - - File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; - if (cellBarcodeWhitelistFile != null) + if (citeSeqCountOutDir != null) { - cellBarcodeWhitelistFile = ensureLocalCopy(cellBarcodeWhitelistFile, outputDir, log, toDelete); - } - else - { - log.debug("No cell barcode whitelist provided"); + inputFiles.add(citeSeqCountOutDir); } + File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; + inputFiles.add(cellBarcodeWhitelistFile); + File htmlFile = new File(outputDir, basename + ".html"); File localHtml = new File(localPipelineDir, htmlFile.getName()); @@ -1266,7 +1216,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St { List methodNames = parameters.methods.stream().map(Enum::name).collect(Collectors.toList()); List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().map(Enum::name).collect(Collectors.toList()); - String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'/work/" + cellBarcodeWhitelistFile.getName() + "'" : "NULL"; + String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL"; Set allowableBarcodes = parameters.getAllowableBarcodeNames(); String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL"; @@ -1274,14 +1224,14 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St String skipNormalizationQcString = parameters.skipNormalizationQc ? "TRUE" : "FALSE"; String keepMarkdown = parameters.keepMarkdown ? "TRUE" : "FALSE"; String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; - String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '/work/" + h5.getName() + "'"; + String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '" + h5.getPath() + "'"; String consensusMethodString = consensusMethodNames.isEmpty() ? "" : ", methodsForConsensus = c('" + StringUtils.join(consensusMethodNames, "','") + "')"; - writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + - ", molInfoFile = '/work/" + molInfo.getName() + "'" + - ", reportFile = '/work/" + htmlFile.getName() + "'" + - ", callFile = '/work/" + callsFile.getName() + "'" + - ", metricsFile = '/work/" + metricsFile.getName() + "'" + - ", rawCountsExport = '/work/" + countFile.getName() + "'" + + writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '" + citeSeqCountOutDir.getPath() + "'" + h5String + + ", molInfoFile = '" + molInfo.getPath() + "'" + + ", reportFile = '" + htmlFile.getPath() + "'" + + ", callFile = '" + callsFile.getPath() + "'" + + ", metricsFile = '" + metricsFile.getPath() + "'" + + ", rawCountsExport = '" + countFile.getPath() + "'" + ", cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + ", title = '" + parameters.getReportTitle() + "'" + @@ -1302,42 +1252,13 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException(e); } - File localBashScript = new File(outputDir, "generateCallsDockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull ghcr.io/bimberlab/cellhashr:latest"); - writer.println("$DOCKER run --rm=true \\"); - if (SequencePipelineService.get().getMaxRam() != null) - { - writer.println("\t--memory=" + SequencePipelineService.get().getMaxRam() + "g \\"); - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - } - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } + DockerWrapper wrapper = new DockerWrapper("ghcr.io/bimberlab/cellhashr:latest", ctx.getLogger(), ctx); + wrapper.addToEnvironment("CELLHASHR_DEBUG", "1"); - writer.println("\t-e CELLHASHR_DEBUG=1 \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-w /work \\"); - writer.println("\tghcr.io/bimberlab/cellhashr:latest \\"); - writer.println("\tRscript --vanilla " + localRScript.getName()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + PipelineStepOutput output = new DefaultPipelineStepOutput(); + wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", localRScript.getPath()), ctx.getWorkingDirectory(), output, inputFiles); + output.getIntermediateFiles().forEach(File::delete); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); if (!htmlFile.exists()) { throw new PipelineJobException("Unable to find HTML file: " + htmlFile.getPath()); @@ -1407,29 +1328,8 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException("Unable to find HTO calls file: " + callsFile.getPath()); } - localBashScript.delete(); localRScript.delete(); - try - { - for (File f : toDelete) - { - log.debug("deleting local copy: " + f.getPath()); - if (f.isDirectory()) - { - FileUtils.deleteDirectory(f); - } - else - { - f.delete(); - } - } - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - return callsFile; } diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 628967c06..4a4f5aec0 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -603,7 +603,7 @@ else if (step.createsSeuratObjects()) ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Creating Final HTML Report"); File finalHtml = new File(ctx.getOutputDir(), "finalHtml.html"); List lines = new ArrayList<>(); - lines.add("rmarkdown::render(output_file = '" + finalHtml.getName() + "', input = '" + finalMarkdownFile.getName() + "', intermediates_dir = '/work')"); + lines.add("rmarkdown::render(output_file = '" + finalHtml.getName() + "', input = '" + finalMarkdownFile.getName() + "', intermediates_dir = '" + ctx.getWorkingDirectory() + "')"); AbstractSingleCellPipelineStep.executeR(ctx, AbstractCellMembraneStep.CONTAINER_NAME, "pandoc", lines, null, null); _resumer.getFileManager().addIntermediateFile(finalMarkdownFile); _resumer.getFileManager().addIntermediateFiles(_resumer.getMarkdownsInOrder()); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java index cc5740250..dd1d7af87 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java @@ -12,7 +12,10 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.List; +import java.util.Set; public class PredictScTour extends AbstractRiraStep { @@ -51,25 +54,22 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + Integer fileId = getProvider().getParameterByName("modelFileId").extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class); + if (fileId == null) + { + throw new PipelineJobException("Missing value for modelFileId param"); + } + + return Collections.singleton(ctx.getSequenceSupport().getCachedData(fileId)); + } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java index c1769415a..795664b89 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java @@ -1,6 +1,5 @@ package org.labkey.singlecell.pipeline.singlecell; -import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; @@ -12,8 +11,9 @@ import org.labkey.api.util.PageFlowUtil; import java.io.File; -import java.io.IOException; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.List; public class RunCelltypistCustomModel extends AbstractRiraStep @@ -94,25 +94,22 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + Integer fileId = getProvider().getParameterByName("modelFileId").extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class); + if (fileId == null) + { + throw new PipelineJobException("Missing value for modelFileId param"); + } + + return Collections.singleton(ctx.getSequenceSupport().getCachedData(fileId)); + } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java index f4dfecc84..c98fd04c1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java @@ -134,7 +134,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Tue, 3 Dec 2024 11:17:44 -0800 Subject: [PATCH 27/53] Missed with prior commit --- .../singlecell/analysis/AbstractSingleCellHandler.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 4a4f5aec0..5c63eed6f 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -139,7 +139,7 @@ public boolean doRunLocal() public Collection getAllowableActionNames() { Set allowableNames = new HashSet<>(); - for (PipelineStepProvider provider: SequencePipelineService.get().getProviders(SingleCellStep.class)) + for (PipelineStepProvider provider: SequencePipelineService.get().getProviders(SingleCellStep.class)) { allowableNames.add(provider.getLabel()); } @@ -604,7 +604,7 @@ else if (step.createsSeuratObjects()) File finalHtml = new File(ctx.getOutputDir(), "finalHtml.html"); List lines = new ArrayList<>(); lines.add("rmarkdown::render(output_file = '" + finalHtml.getName() + "', input = '" + finalMarkdownFile.getName() + "', intermediates_dir = '" + ctx.getWorkingDirectory() + "')"); - AbstractSingleCellPipelineStep.executeR(ctx, AbstractCellMembraneStep.CONTAINER_NAME, "pandoc", lines, null, null); + AbstractSingleCellPipelineStep.executeR(ctx, AbstractCellMembraneStep.CONTAINER_NAME, "pandoc", lines, null, null, null); _resumer.getFileManager().addIntermediateFile(finalMarkdownFile); _resumer.getFileManager().addIntermediateFiles(_resumer.getMarkdownsInOrder()); _resumer.getFileManager().addIntermediateFiles(_resumer.getHtmlFilesInOrder()); @@ -630,7 +630,7 @@ else if (step.createsSeuratObjects()) Integer id = NumberUtils.createInteger(output.getDatasetId()); if (!inputMap.containsKey(id)) { - ctx.getLogger().warn("No input found matching dataset Id: " + output.getDatasetId()); + ctx.getLogger().warn("No input found matching dataset Id: {}", output.getDatasetId()); } else { @@ -641,7 +641,7 @@ else if (step.createsSeuratObjects()) } catch (NumberFormatException e) { - ctx.getLogger().error("Expected dataset ID to be an integer: " + output.getDatasetId()); + ctx.getLogger().error("Expected dataset ID to be an integer: {}", output.getDatasetId()); } } else From b9a01398ea3cb09adf5fa196187b07f9019343f8 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 12:46:13 -0800 Subject: [PATCH 28/53] Change handling of legacy docker volumes --- .../sequenceanalysis/pipeline/SequenceJob.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 0f0db420c..5ed503466 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -188,16 +188,11 @@ public void setFolderFileRoot(PipeRoot folderFileRoot) } public Collection getDockerVolumes() - { - return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); - } - - public void setDockerVolumes(Collection dockerVolumes) { // TODO: this is for legacy jobs that included the -v arg. Eventually remove: - if (dockerVolumes.stream().anyMatch(x -> x.startsWith("-v"))) + if (_dockerVolumes != null && _dockerVolumes.stream().anyMatch(x -> x.startsWith("-v"))) { - dockerVolumes = dockerVolumes.stream().map(x -> { + _dockerVolumes = _dockerVolumes.stream().map(x -> { if (x.startsWith("-v")) { x = x.split(":")[1]; @@ -208,6 +203,11 @@ public void setDockerVolumes(Collection dockerVolumes) }).collect(Collectors.toSet()); } + return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); + } + + public void setDockerVolumes(Collection dockerVolumes) + { _dockerVolumes = dockerVolumes; } From d923842d0d5b2dc6b02433822ed58246413e6c2d Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 16:35:52 -0800 Subject: [PATCH 29/53] Remove entrypoint from ParagraphStep --- .../org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 1 - 1 file changed, 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 9311366d6..12f6d19eb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -229,7 +229,6 @@ else if (!svVcf.exists()) DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); - dockerWrapper.setEntryPoint("/bin/bash"); List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); From db9085cf8fa57036e9d33fb327accc1bf0bf6e83 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 16:41:28 -0800 Subject: [PATCH 30/53] Add spaces to DockerWrapper --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 5dcbe3ac6..420371f35 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -108,10 +108,10 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline _environment.put("HOME", _alternateUserHome); } - _ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\")); + _ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\")); if (inputFiles != null) { - inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\")); + inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\")); } if (_tmpDir != null) @@ -119,7 +119,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith)) { - writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); + writer.println("\t-v '" + _tmpDir.getPath() + "':/tmp \\"); } else { From d2a0fe9e5ef020e743f9334c7522f4c448b53e64 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 16:53:41 -0800 Subject: [PATCH 31/53] Declare inputs in ParagraphStep --- .../labkey/sequenceanalysis/run/alignment/ParagraphStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 12f6d19eb..838f00f7e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -281,7 +281,7 @@ else if (!svVcf.exists()) paragraphArgs.add(threads.toString()); } - dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager()); + dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager(), Arrays.asList(so.getFile(), genomeFasta, svVcf)); File genotypes = new File(paragraphOutDir, "genotypes.vcf.gz"); if (!genotypes.exists()) From 1d931dbf86717357436b9f931805178ebd5ffaea Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 17:26:47 -0800 Subject: [PATCH 32/53] Add quotes to environment vars in DockerWrapper --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 420371f35..b1a5208cd 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -146,7 +146,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline for (String key : _environment.keySet()) { - writer.println("\t-e " + key + "=" + _environment.get(key) + " \\"); + writer.println("\t-e " + key + "='" + _environment.get(key) + "' \\"); } writer.println("\t" + _containerName + " \\"); writer.println("\t/bin/bash " + dockerBashScript.getPath()); From 97a71bb35092bd1fb0a0371c929035210bf2297a Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 3 Dec 2024 20:09:35 -0800 Subject: [PATCH 33/53] Set scripts executable --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index b1a5208cd..5f9e209e1 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -166,6 +166,8 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline throw new PipelineJobException(e); } + localBashScript.setExecutable(true); + dockerBashScript.setExecutable(true); execute(Arrays.asList("/bin/bash", localBashScript.getPath())); } From 70d73612ff3bb7e382982d60f3a9d65525e2ad03 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 03:57:48 -0800 Subject: [PATCH 34/53] Separate docker and process environments --- .../run/AbstractCommandWrapper.java | 2 +- .../sequenceanalysis/run/DockerWrapper.java | 24 ++++++++++++------- .../AbstractSingleCellPipelineStep.java | 2 +- .../singlecell/CellHashingServiceImpl.java | 2 +- .../labkey/singlecell/run/NimbleHelper.java | 2 +- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java index 971abf382..2e3e84e2c 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/AbstractCommandWrapper.java @@ -48,7 +48,7 @@ abstract public class AbstractCommandWrapper implements CommandWrapper private boolean _warnNonZeroExits = true; private boolean _throwNonZeroExits = true; private Integer _lastReturnCode = null; - protected final Map _environment = new HashMap<>(); + private final Map _environment = new HashMap<>(); private final List _commandsExecuted = new ArrayList<>(); public AbstractCommandWrapper(@Nullable Logger logger) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 5f9e209e1..e9e3ec641 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -15,8 +15,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -28,14 +30,13 @@ public class DockerWrapper extends AbstractCommandWrapper private String _entryPoint = null; private boolean _runPrune = true; private String _alternateUserHome = null; + private final Map _dockerEnvironment = new HashMap<>(); public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { super(log); _containerName = containerName; _ctx = ctx; - - _environment.clear(); } public void setAlternateUserHome(String alternateUserHome) @@ -75,7 +76,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline { writer.println("#!/bin/bash"); writer.println("set -x"); - writer.println("WD=`pwd`"); + writer.println("set -e"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); writer.println("$DOCKER pull " + _containerName); @@ -100,12 +101,12 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline _ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add"); } - _environment.put("USER_HOME", homeDir.getPath()); + _dockerEnvironment.put("USER_HOME", homeDir.getPath()); } if (_alternateUserHome != null) { - _environment.put("HOME", _alternateUserHome); + _dockerEnvironment.put("HOME", _alternateUserHome); } _ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\")); @@ -126,7 +127,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline _ctx.getLogger().debug("tmpDir already present in docker volumes, omitting"); } - addToEnvironment("TMPDIR", _tmpDir.getPath()); + addToDockerEnvironment("TMPDIR", _tmpDir.getPath()); } if (_entryPoint != null) @@ -135,7 +136,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline } writer.println("\t-w " + workDir.getPath() + " \\"); - addToEnvironment("WORK_DIR", workDir.getPath()); + addToDockerEnvironment("WORK_DIR", workDir.getPath()); Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) @@ -144,9 +145,9 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("\t--memory='" + maxRam + "g' \\"); } - for (String key : _environment.keySet()) + for (String key : _dockerEnvironment.keySet()) { - writer.println("\t-e " + key + "='" + _environment.get(key) + "' \\"); + writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\"); } writer.println("\t" + _containerName + " \\"); writer.println("\t/bin/bash " + dockerBashScript.getPath()); @@ -171,6 +172,11 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline execute(Arrays.asList("/bin/bash", localBashScript.getPath())); } + public void addToDockerEnvironment(String key, String value) + { + _dockerEnvironment.put(key, value); + } + private Collection inspectInputFiles(Collection inputFiles) { Set toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet()); diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index ae35280aa..d24f1abf1 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -326,7 +326,7 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC seuratThreads = maxThreads; } - wrapper.addToEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); + wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); } File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index a79f9bf27..76415b075 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1253,7 +1253,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St } DockerWrapper wrapper = new DockerWrapper("ghcr.io/bimberlab/cellhashr:latest", ctx.getLogger(), ctx); - wrapper.addToEnvironment("CELLHASHR_DEBUG", "1"); + wrapper.addToDockerEnvironment("CELLHASHR_DEBUG", "1"); PipelineStepOutput output = new DefaultPipelineStepOutput(); wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", localRScript.getPath()), ctx.getWorkingDirectory(), output, inputFiles); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index bc809705e..34bf076e6 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -573,7 +573,7 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu wrapper.setTmpDir(null); - wrapper.addToEnvironment("RUST_BACKTRACE", "1"); + wrapper.addToDockerEnvironment("RUST_BACKTRACE", "1"); File doneFile = null; if (resumeString != null) From f9e70c2d309846b2af135c8b053f76551a29aed9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 04:35:34 -0800 Subject: [PATCH 35/53] For docker --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index e9e3ec641..e77e5206e 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -169,7 +169,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline localBashScript.setExecutable(true); dockerBashScript.setExecutable(true); - execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + execute(Arrays.asList(localBashScript.getPath())); } public void addToDockerEnvironment(String key, String value) From 64f36fa027e533bcaa222385b97eeff9ee46af9f Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 05:52:31 -0800 Subject: [PATCH 36/53] Call docker bash script directly --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index e77e5206e..a91cea5c7 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -150,7 +150,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\"); } writer.println("\t" + _containerName + " \\"); - writer.println("\t/bin/bash " + dockerBashScript.getPath()); + writer.println("\t" + dockerBashScript.getPath()); writer.println("DOCKER_EXIT_CODE=$?"); writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE"); writer.println("exit $DOCKER_EXIT_CODE"); @@ -169,7 +169,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline localBashScript.setExecutable(true); dockerBashScript.setExecutable(true); - execute(Arrays.asList(localBashScript.getPath())); + execute(Arrays.asList("/bin/bash", localBashScript.getPath())); } public void addToDockerEnvironment(String key, String value) From fb63b3507245616cdd502fb004bfc7a80c2c6b88 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 08:02:24 -0800 Subject: [PATCH 37/53] Null check to docker volumes --- .../labkey/sequenceanalysis/SequencePipelineServiceImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index d1b808338..bfaaaba78 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -491,7 +491,7 @@ public Collection getDockerVolumes(Container c) } } - return null; + return input; } @Override From 13a6719a355aa2d91029eb0763762ae34dcce93b Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 08:14:10 -0800 Subject: [PATCH 38/53] Add debugging is leidenalg not found --- singlecell/resources/chunks/FindClustersAndDimRedux.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/singlecell/resources/chunks/FindClustersAndDimRedux.R b/singlecell/resources/chunks/FindClustersAndDimRedux.R index f002dd905..6f78e46b0 100644 --- a/singlecell/resources/chunks/FindClustersAndDimRedux.R +++ b/singlecell/resources/chunks/FindClustersAndDimRedux.R @@ -1,3 +1,10 @@ +if (!reticulate::py_module_available(module = 'leidenalg')) { + logger::log_warn('python leidenalg not found!') + logger::log_warn(paste0('Python available: ', reticulate::py_available())) + logger::log_warn(reticulate::py_config()) + logger::log_warn(paste0('installed packages: ', paste0(reticulate::py_list_packages()$package, collapse = ', '))) +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) From 2105d245672ab62d139907166ebd9e31e6fd3ad0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 08:27:40 -0800 Subject: [PATCH 39/53] Jackson cannot serialize unmodifiableCollection --- .../org/labkey/sequenceanalysis/pipeline/SequenceJob.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 5ed503466..30da83a71 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -56,6 +56,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -203,12 +204,12 @@ public Collection getDockerVolumes() }).collect(Collectors.toSet()); } - return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); + return _dockerVolumes == null ? Collections.emptySet() : new HashSet<>(_dockerVolumes); } public void setDockerVolumes(Collection dockerVolumes) { - _dockerVolumes = dockerVolumes; + _dockerVolumes = dockerVolumes == null ? null : new HashSet<>(dockerVolumes); } public void setDescription(String description) From 8af4f204fbc264ec1ad57d2908a3dc32060d1d69 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 09:12:52 -0800 Subject: [PATCH 40/53] Jackson cannot serialize unmodifiableCollection --- .../src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 30da83a71..b1aff9bcc 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -138,7 +138,7 @@ public SequenceJob(String providerName, Container c, User u, @Nullable String jo writeParameters(params); _folderFileRoot = c.isWorkbook() ? PipelineService.get().findPipelineRoot(c.getParent()) : pipeRoot; - _dockerVolumes = SequencePipelineService.get().getDockerVolumes(c); + _dockerVolumes = new HashSet<>(SequencePipelineService.get().getDockerVolumes(c)); setLogFile(_getLogFile()); writeSupportToDisk(); From 23c3a306152df034638da8874e27913b53ae30b7 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 09:22:11 -0800 Subject: [PATCH 41/53] Improve leidenalg debugging --- .../resources/chunks/FindClustersAndDimRedux.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/chunks/FindClustersAndDimRedux.R b/singlecell/resources/chunks/FindClustersAndDimRedux.R index 6f78e46b0..1aadac4c8 100644 --- a/singlecell/resources/chunks/FindClustersAndDimRedux.R +++ b/singlecell/resources/chunks/FindClustersAndDimRedux.R @@ -1,8 +1,18 @@ if (!reticulate::py_module_available(module = 'leidenalg')) { logger::log_warn('python leidenalg not found!') logger::log_warn(paste0('Python available: ', reticulate::py_available())) - logger::log_warn(reticulate::py_config()) - logger::log_warn(paste0('installed packages: ', paste0(reticulate::py_list_packages()$package, collapse = ', '))) + logger::log_warn('Python config') + pyConfig <- reticulate::py_config() + for (pn in names(pyConfig)) { + logger::log_warn(paste0(pn, ': ', paste0(pyConfig[[pn]]), collapse = ',')) + } + + logger::log_warn(paste0('pythonpath: ', reticulate::py_config()$pythonpath)) + + logger::log_warn('Python packages:') + for (pn in reticulate::py_list_packages()$package) { + logger::log_warn(pn) + } } for (datasetId in names(seuratObjects)) { From 4501fe3ae9749801ad1f06962c73f82424449f0b Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 11:22:07 -0800 Subject: [PATCH 42/53] Add ability to conditionally use multiseq on large datasets --- .../api/singlecell/CellHashingService.java | 24 +++++++++++-- .../singlecell/CellHashingServiceImpl.java | 36 +++++++++++++++++-- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index cfd24d865..a6f314dd3 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -153,7 +153,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu if (methodStr2 != null) { ret.consensusMethods = extractMethods(methodStr2); - if (!ret.methods.containsAll(ret.consensusMethods)) + if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods)) { throw new PipelineJobException("All consensusMethods must be present in methods: " + methodStr2); } @@ -189,7 +189,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse if (ret.consensusMethods != null && !ret.consensusMethods.isEmpty()) { - if (!ret.methods.containsAll(ret.consensusMethods)) + if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods)) { throw new PipelineJobException("All consensusMethods must be present in methods: " + ret.consensusMethods.stream().map(CALLING_METHOD::name).collect(Collectors.joining(","))); } @@ -326,6 +326,7 @@ public Set getAllowableBarcodeNames() throws PipelineJobException public enum CALLING_METHOD { multiseq(true, false), + multiseqOnLargeData(true, true, false, 10000, "multiseq"), htodemux(false, false), dropletutils(true, true), gmm_demux(true, true), @@ -337,6 +338,8 @@ public enum CALLING_METHOD boolean isDefaultRun; boolean isDefaultConsensus; boolean requiresH5; + int minCells; + String label; CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus) { @@ -344,10 +347,17 @@ public enum CALLING_METHOD } CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5) + { + this(isDefaultRun, isDefaultConsensus, requiresH5, 0, null); + } + + CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5, int minCells, String label) { this.isDefaultRun = isDefaultRun; this.isDefaultConsensus = isDefaultConsensus; this.requiresH5 = requiresH5; + this.minCells = minCells; + this.label = label; } public boolean isDefaultRun() @@ -360,6 +370,16 @@ public boolean isDefaultConsensus() return isDefaultConsensus; } + public int getMinCells() + { + return minCells; + } + + public String getLabel() + { + return label == null ? name() : label; + } + public boolean isRequiresH5() { return requiresH5; diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 76415b075..c5aa873d6 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -69,6 +69,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.labkey.singlecell.run.CellRangerGexCountStep.LOUPE_CATEGORY; @@ -1214,13 +1215,44 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St File localRScript = new File(outputDir, "generateCallsWrapper.R"); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { - List methodNames = parameters.methods.stream().map(Enum::name).collect(Collectors.toList()); - List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().map(Enum::name).collect(Collectors.toList()); String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL"; + long totalCellBarcodes; + if (cellBarcodeWhitelistFile != null) + { + try (Stream st = Files.lines(cellBarcodeWhitelistFile.toPath())) + { + totalCellBarcodes = st.count(); + } + } + else + { + totalCellBarcodes = 99999L; + } + ctx.getLogger().debug("Total input cell barcodes: " + totalCellBarcodes); Set allowableBarcodes = parameters.getAllowableBarcodeNames(); String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL"; + List methodNames = parameters.methods.stream().filter(m -> { + if (totalCellBarcodes > m.getMinCells()) + { + ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name()); + return false; + } + + return true; + }).map(CALLING_METHOD::getLabel).distinct().toList(); + + List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> { + if (totalCellBarcodes > m.getMinCells()) + { + ctx.getLogger().debug("Dropping consensus method due to insufficient cells: " + m.name()); + return false; + } + + return true; + }).map(CALLING_METHOD::getLabel).distinct().toList(); + String skipNormalizationQcString = parameters.skipNormalizationQc ? "TRUE" : "FALSE"; String keepMarkdown = parameters.keepMarkdown ? "TRUE" : "FALSE"; String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; From 65db68573da037ca5e1f7a1820b6520254000453 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 4 Dec 2024 19:47:22 -0800 Subject: [PATCH 43/53] Expand duplicated prototype query --- .../singlecell/duplicatePrototypes.query.xml | 2 +- .../queries/singlecell/duplicatePrototypes.sql | 17 ++++++++++++++++- .../views/singleCellDataManagement.html | 2 +- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml b/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml index 20387aff2..96e1ebab2 100644 --- a/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml +++ b/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml @@ -2,7 +2,7 @@ - Duplicated Seurat Object Prototypes + Duplicated Seurat Object Prototypes and Loupe Files Readset Id diff --git a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql index 99cd94576..91fe40f5b 100644 --- a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql +++ b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql @@ -2,9 +2,24 @@ SELECT o.readset, min(o.rowId) as minRowId, min(o.analysis_id) as minAnalysisId, - count(*) as totalPrototypes + count(*) as totalPrototypes, + o.category FROM sequenceanalysis.outputfiles o WHERE o.category = 'Seurat Object Prototype' GROUP BY o.readset +HAVING COUNT(*) > 1 + +UNION ALL + +SELECT + o.readset, + min(o.rowId) as minRowId, + min(o.analysis_id) as minAnalysisId, + count(*) as totalPrototypes, + o.category + +FROM sequenceanalysis.outputfiles o +WHERE o.category = '10x Loupe File' +GROUP BY o.readset HAVING COUNT(*) > 1 \ No newline at end of file diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index aafee8cb5..cfda8679e 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -130,7 +130,7 @@ queryName: 'stalePrototypes' }) }, { - name: 'Duplicate Seurat Object Prototypes', + name: 'Duplicate Seurat Object Prototypes and Loupe Files', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { schemaName: 'singlecell', queryName: 'duplicatePrototypes' From e75375fdb28b5744e6f5c921c5637f1515d60d91 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 5 Dec 2024 04:24:29 -0800 Subject: [PATCH 44/53] Make nimble output gzipped results file --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 34bf076e6..fd6fef01d 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -297,6 +297,8 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin { if (SequencePipelineService.get().hasMinLineCount(results, 2)) { + long lineCount = SequencePipelineService.get().getLineCount(results); + _ctx.getLogger().debug("Found {} lines in file {}", lineCount, results.getPath()); throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); } } @@ -496,7 +498,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt reportArgs.add("-i"); reportArgs.add(alignResultsGz.getPath()); - File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt"); + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt.gz"); if (reportResultsGz.exists()) { reportResultsGz.delete(); From ad8f951a394ea95fa8a9cccd230d387faeb8da21 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 5 Dec 2024 11:38:27 -0800 Subject: [PATCH 45/53] Fix error in cell hashing --- .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index c5aa873d6..81629915c 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1234,7 +1234,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL"; List methodNames = parameters.methods.stream().filter(m -> { - if (totalCellBarcodes > m.getMinCells()) + if (totalCellBarcodes < m.getMinCells()) { ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name()); return false; @@ -1244,7 +1244,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St }).map(CALLING_METHOD::getLabel).distinct().toList(); List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> { - if (totalCellBarcodes > m.getMinCells()) + if (totalCellBarcodes < m.getMinCells()) { ctx.getLogger().debug("Dropping consensus method due to insufficient cells: " + m.name()); return false; From 68205ae6af173a66e3a2cc48760b1a1ce21203be Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 5 Dec 2024 12:09:32 -0800 Subject: [PATCH 46/53] Fix case in Sys.getenv() --- singlecell/resources/chunks/AppendMetadata.R | 4 ++-- singlecell/resources/chunks/AppendNimble.R | 4 ++-- singlecell/resources/chunks/AppendTcr.R | 4 ++-- singlecell/resources/chunks/AvgExpression.R | 4 ++-- singlecell/resources/chunks/ClassifyTNKByExpression.R | 4 ++-- singlecell/resources/chunks/RunConga.R | 4 ++-- singlecell/resources/chunks/StudyMetadata.R | 4 ++-- singlecell/resources/chunks/SummarizeTCellActivation.R | 4 ++-- singlecell/resources/chunks/TrainCelltypist.R | 2 +- singlecell/resources/chunks/UpdateSeuratPrototype.R | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/singlecell/resources/chunks/AppendMetadata.R b/singlecell/resources/chunks/AppendMetadata.R index a79f3f2c6..addefee43 100644 --- a/singlecell/resources/chunks/AppendMetadata.R +++ b/singlecell/resources/chunks/AppendMetadata.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index df76eaf49..7a913d757 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/AppendTcr.R b/singlecell/resources/chunks/AppendTcr.R index e8ae8572e..bc6898617 100644 --- a/singlecell/resources/chunks/AppendTcr.R +++ b/singlecell/resources/chunks/AppendTcr.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/AvgExpression.R b/singlecell/resources/chunks/AvgExpression.R index aacfe01ed..5218624f6 100644 --- a/singlecell/resources/chunks/AvgExpression.R +++ b/singlecell/resources/chunks/AvgExpression.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/ClassifyTNKByExpression.R b/singlecell/resources/chunks/ClassifyTNKByExpression.R index 9f19b7fae..e7a00f83c 100644 --- a/singlecell/resources/chunks/ClassifyTNKByExpression.R +++ b/singlecell/resources/chunks/ClassifyTNKByExpression.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/RunConga.R b/singlecell/resources/chunks/RunConga.R index 5f07adccb..12414779f 100644 --- a/singlecell/resources/chunks/RunConga.R +++ b/singlecell/resources/chunks/RunConga.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R index d3d10453e..fd8d4e931 100644 --- a/singlecell/resources/chunks/StudyMetadata.R +++ b/singlecell/resources/chunks/StudyMetadata.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/SummarizeTCellActivation.R b/singlecell/resources/chunks/SummarizeTCellActivation.R index 167ea4b6a..b03c351ee 100644 --- a/singlecell/resources/chunks/SummarizeTCellActivation.R +++ b/singlecell/resources/chunks/SummarizeTCellActivation.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } diff --git a/singlecell/resources/chunks/TrainCelltypist.R b/singlecell/resources/chunks/TrainCelltypist.R index 6afba85cb..8f6866d10 100644 --- a/singlecell/resources/chunks/TrainCelltypist.R +++ b/singlecell/resources/chunks/TrainCelltypist.R @@ -6,4 +6,4 @@ datasetId <- names(seuratObjects)[[1]] printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) -RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = Sys.getEnv('WORK_DIR')) \ No newline at end of file +RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = Sys.getenv('WORK_DIR')) \ No newline at end of file diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R index bbcfc7071..a30396d81 100644 --- a/singlecell/resources/chunks/UpdateSeuratPrototype.R +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -1,6 +1,6 @@ -netRc <- paste0(Sys.getEnv('USER_HOME'), '/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') if (!file.exists(netRc)) { - print(list.files(Sys.getEnv('USER_HOME'))) + print(list.files(Sys.getenv('USER_HOME'))) stop(paste0('Unable to find file: ', netRc)) } From d2485fc6350f30b4f34b4fc28f210766d6e1a5ef Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 5 Dec 2024 13:47:44 -0800 Subject: [PATCH 47/53] Add placeholder for multiqc install --- SequenceAnalysis/pipeline_code/extra_tools_install.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index c905586b2..33686252b 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -218,3 +218,14 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/multiqc || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf multiqc* + rm -Rf $LKTOOLS_DIR/multiqc* + + python3 -m pip install --user multiqc +else + echo "Already installed" +fi \ No newline at end of file From 5cc7346848d5724ccd14815347ae5f502a5d8cd9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 5 Dec 2024 19:08:38 -0800 Subject: [PATCH 48/53] Fix SQL error --- .../resources/queries/singlecell/duplicatePrototypes.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql index 91fe40f5b..ee5a3c928 100644 --- a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql +++ b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql @@ -7,7 +7,7 @@ SELECT FROM sequenceanalysis.outputfiles o WHERE o.category = 'Seurat Object Prototype' -GROUP BY o.readset +GROUP BY o.readset, o.category HAVING COUNT(*) > 1 UNION ALL @@ -21,5 +21,5 @@ SELECT FROM sequenceanalysis.outputfiles o WHERE o.category = '10x Loupe File' -GROUP BY o.readset +GROUP BY o.readset, o.category HAVING COUNT(*) > 1 \ No newline at end of file From 9224d021b74cc628fbd9eed0cd78b993d0ee7063 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 6 Dec 2024 20:45:38 -0800 Subject: [PATCH 49/53] Remove scMetabolism --- singlecell/resources/chunks/RunScMetabolism.R | 14 ----- .../labkey/singlecell/SingleCellModule.java | 2 - .../pipeline/singlecell/RunScMetabolism.java | 53 ------------------- 3 files changed, 69 deletions(-) delete mode 100644 singlecell/resources/chunks/RunScMetabolism.R delete mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java diff --git a/singlecell/resources/chunks/RunScMetabolism.R b/singlecell/resources/chunks/RunScMetabolism.R deleted file mode 100644 index 3eabe5611..000000000 --- a/singlecell/resources/chunks/RunScMetabolism.R +++ /dev/null @@ -1,14 +0,0 @@ -for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - - for (metabolismType in metabolismTypes) { - seuratObj <- CellMembrane::RunScMetabolism(seuratObj, metabolismType = metabolismType) - } - - saveData(seuratObj, datasetId) - - # Cleanup - rm(seuratObj) - gc() -} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 7749acfd1..922c93ee9 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -90,7 +90,6 @@ import org.labkey.singlecell.pipeline.singlecell.RunSDA; import org.labkey.singlecell.pipeline.singlecell.RunScGate; import org.labkey.singlecell.pipeline.singlecell.RunScGateBuiltin; -import org.labkey.singlecell.pipeline.singlecell.RunScMetabolism; import org.labkey.singlecell.pipeline.singlecell.RunSingleR; import org.labkey.singlecell.pipeline.singlecell.RunVision; import org.labkey.singlecell.pipeline.singlecell.ScoreCellCycle; @@ -284,7 +283,6 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new RunLDA.Provider()); SequencePipelineService.get().registerPipelineStep(new FilterDisallowedClasses.Provider()); SequencePipelineService.get().registerPipelineStep(new SummarizeTCellActivation.Provider()); - SequencePipelineService.get().registerPipelineStep(new RunScMetabolism.Provider()); SequencePipelineService.get().registerPipelineStep(new ScoreCellCycle.Provider()); SequencePipelineService.get().registerPipelineStep(new TrainScTour.Provider()); SequencePipelineService.get().registerPipelineStep(new PredictScTour.Provider()); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java deleted file mode 100644 index 80e0fc4a3..000000000 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.labkey.singlecell.pipeline.singlecell; - -import org.json.JSONObject; -import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.singlecell.pipeline.SeuratToolParameter; -import org.labkey.api.singlecell.pipeline.SingleCellStep; - -import java.util.List; - -public class RunScMetabolism extends AbstractCellMembraneStep -{ - public RunScMetabolism(PipelineContext ctx, RunScMetabolism.Provider provider) - { - super(provider, ctx); - } - - public static class Provider extends AbstractPipelineStepProvider - { - public Provider() - { - super("RunScMetabolism", "scMetabolism", "scMetabolism", "This will run scMetabolism to score enrichment of metabolic pathways.", List.of( - SeuratToolParameter.create("metabolismTypes", "Metabolism Type(s)", "The databases to use", "ldk-simplecombo", new JSONObject() - {{ - put("multiSelect", true); - put("allowBlank", false); - put("storeValues", "KEGG;REACTOME"); - put("initialValues", "KEGG;REACTOME"); - put("delimiter", ";"); - put("joinReturnValue", true); - }}, "KEGG;REACTOME", null, true, true).delimiter(";") - ), null, null); - } - - @Override - public RunScMetabolism create(PipelineContext ctx) - { - return new RunScMetabolism(ctx, this); - } - } - - @Override - public boolean createsSeuratObjects() - { - return true; - } - - @Override - public String getFileSuffix() - { - return "scMetabolism"; - } -} From 121a5f8959b6f53b65960c7ad42ced7a7335355c Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 7 Dec 2024 11:41:37 -0800 Subject: [PATCH 50/53] Simplify merge VCF code --- .../pipeline/VariantProcessingStep.java | 2 +- .../pipeline/OrphanFilePipelineJob.java | 23 +---------------- .../pipeline/ProcessVariantsHandler.java | 4 +-- .../VariantProcessingRemoteMergeTask.java | 25 +++++++++---------- .../run/variant/SplitVcfBySamplesStep.java | 4 +-- 5 files changed, 18 insertions(+), 40 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java index 09e2c5786..4b77e5dd7 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java @@ -97,7 +97,7 @@ default void validateScatter(ScatterGatherMethod method, PipelineJob job) throws } - default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { ctx.getLogger().debug("No additional merge tasks are implemented for: " + getClass().getName()); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index 3d339b43a..9ffea44dc 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -200,27 +200,8 @@ public boolean isJobComplete(PipelineJob job) if (!orphanJobs.isEmpty()) { - getJob().getLogger().info("## The following sequence jobs are not referenced by readsets, analyses or output files."); + getJob().getLogger().info("## There are {} sequence jobs are not referenced by readsets, analyses or output files.", orphanJobs.size()); getJob().getLogger().info("## The best action would be to view the pipeline job list, 'Sequence Jobs' view, and filter for jobs without sequence outputs. Deleting any unwanted jobs through the UI should also delete files."); - for (PipelineStatusFile sf : orphanJobs) - { - File f = new File(sf.getFilePath()).getParentFile(); - if (f.exists()) - { - long size = FileUtils.sizeOfDirectory(f); - //ignore if less than 1mb - if (size > 1e6) - { - getJob().getLogger().info("\n## size: " + FileUtils.byteCountToDisplaySize(size)); - getJob().getLogger().info("\n" + f.getPath()); - } - } - else - { - messages.add("## Pipeline job folder does not exist: " + sf.getRowId()); - messages.add(f.getPath()); - } - } } if (!messages.isEmpty()) @@ -388,8 +369,6 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile { if (!knownSequenceJobPaths.contains(subdir)) { - messages.add("#pipeline path listed as orphan, and not present in known job paths: "); - messages.add(subdir.getPath()); probableDeletes.add(subdir); unexpectedPipelineDirs.add(subdir); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java index 884711d43..c29fd8e05 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java @@ -898,7 +898,7 @@ else if (AbstractGenomicsDBImportHandler.TILE_DB_FILETYPE.isType(input)) } @Override - public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { List> providers = SequencePipelineService.get().getSteps(job, VariantProcessingStep.class); for (PipelineStepCtx stepCtx : providers) @@ -906,7 +906,7 @@ public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, TaskFil VariantProcessingStep vps = stepCtx.getProvider().create(ctx); if (vps instanceof VariantProcessingStep.SupportsScatterGather ssg) { - ssg.performAdditionalMergeTasks(ctx, job, manager, genome, orderedScatterOutputs, orderedJobDirs); + ssg.performAdditionalMergeTasks(ctx, job, genome, orderedScatterOutputs, orderedJobDirs); } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java index 5eeb8e50e..e7a9be1d7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java @@ -110,18 +110,17 @@ private VariantProcessingJob getPipelineJob() { SequenceTaskHelper.logModuleVersions(getJob().getLogger()); RecordedAction action = new RecordedAction(ACTION_NAME); - TaskFileManagerImpl manager = new TaskFileManagerImpl(getPipelineJob(), _wd.getDir(), _wd); JobContextImpl ctx = new JobContextImpl(getPipelineJob(), getPipelineJob().getSequenceSupport(), getPipelineJob().getParameterJson(), _wd.getDir(), new TaskFileManagerImpl(getPipelineJob(), _wd.getDir(), _wd), _wd); File finalOut; SequenceOutputHandler handler = getPipelineJob().getHandler(); if (handler instanceof SequenceOutputHandler.HasCustomVariantMerge) { - finalOut = ((SequenceOutputHandler.HasCustomVariantMerge)handler).performVariantMerge(manager, action, handler, getJob()); + finalOut = ((SequenceOutputHandler.HasCustomVariantMerge)handler).performVariantMerge(ctx.getFileManager(), action, handler, getJob()); } else { - finalOut = runDefaultVariantMerge(ctx, manager, action, handler); + finalOut = runDefaultVariantMerge(ctx, action, handler); } Map scatterOutputs = getPipelineJob().getScatterJobOutputs(); @@ -136,7 +135,7 @@ private VariantProcessingJob getPipelineJob() if (finalOut != null) { SequenceOutputFile finalOutput = ((SequenceOutputHandler.TracksVCF) getPipelineJob().getHandler()).createFinalSequenceOutput(getJob(), finalOut, getPipelineJob().getFiles()); - manager.addSequenceOutput(finalOutput); + ctx.getFileManager().addSequenceOutput(finalOutput); } } else @@ -147,16 +146,16 @@ private VariantProcessingJob getPipelineJob() File cacheDir = getPipelineJob().getLocationForCachedInputs(_wd, false); if (cacheDir.exists()) { - manager.addIntermediateFile(cacheDir); + ctx.getFileManager().addIntermediateFile(cacheDir); } - manager.deleteIntermediateFiles(); - manager.cleanup(Collections.singleton(action)); + ctx.getFileManager().deleteIntermediateFiles(); + ctx.getFileManager().cleanup(Collections.singleton(action)); return new RecordedActionSet(action); } - private @Nullable File runDefaultVariantMerge(JobContextImpl ctx, TaskFileManagerImpl manager, RecordedAction action, SequenceOutputHandler handler) throws PipelineJobException + private @Nullable File runDefaultVariantMerge(JobContextImpl ctx, RecordedAction action, SequenceOutputHandler handler) throws PipelineJobException { Map> jobToIntervalMap = getPipelineJob().getJobToIntervalMap(); getJob().setStatus(PipelineJob.TaskStatus.running, "Combining Per-Contig VCFs: " + jobToIntervalMap.size()); @@ -186,9 +185,9 @@ else if (!vcf.exists()) toConcat.add(vcf); - manager.addInput(action, "Input VCF", vcf); - manager.addIntermediateFile(vcf); - manager.addIntermediateFile(new File(vcf.getPath() + ".tbi")); + ctx.getFileManager().addInput(action, "Input VCF", vcf); + ctx.getFileManager().addIntermediateFile(vcf); + ctx.getFileManager().addIntermediateFile(new File(vcf.getPath() + ".tbi")); } if (totalNull > 0 && !toConcat.isEmpty()) @@ -225,13 +224,13 @@ else if (!vcf.exists()) boolean sortAfterMerge = getPipelineJob().scatterMethodRequiresSort() || handler instanceof VariantProcessingStep.SupportsScatterGather && ((VariantProcessingStep.SupportsScatterGather) handler).doSortAfterMerge(); combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, getJob().getLogger(), true, null, sortAfterMerge); } - manager.addOutput(action, "Merged VCF", combined); + ctx.getFileManager().addOutput(action, "Merged VCF", combined); } if (handler instanceof VariantProcessingStep.SupportsScatterGather) { ctx.getLogger().debug("Running additional merge tasks"); - ((VariantProcessingStep.SupportsScatterGather) handler).performAdditionalMergeTasks(ctx, getPipelineJob(), manager, genome, toConcat, new ArrayList<>(jobToIntervalMap.keySet())); + ((VariantProcessingStep.SupportsScatterGather) handler).performAdditionalMergeTasks(ctx, getPipelineJob(), genome, toConcat, new ArrayList<>(jobToIntervalMap.keySet())); } return combined; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java index 7191b8d8b..20c076c54 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java @@ -90,7 +90,7 @@ private List findProducedVcfs(File inputVCF, File outputDirectory) } @Override - public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { job.getLogger().info("Merging additional track VCFs"); File inputVCF = ((SequenceJob)getPipelineCtx().getJob()).getInputFiles().get(0); @@ -133,7 +133,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi so.setFile(combined); so.setCategory("VCF File"); so.setLibrary_id(genome.getGenomeId()); - manager.addSequenceOutput(so); + ctx.getFileManager().addSequenceOutput(so); } } From 9a12e355105116335d02f7b266c0d0af951c6b0d Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 11 Dec 2024 10:55:25 -0800 Subject: [PATCH 51/53] Allow KING to exclude contigs --- .../run/variant/KingInferenceStep.java | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index aa7d6da7b..62c4a3c0e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -13,6 +13,7 @@ import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; @@ -43,8 +44,11 @@ public Provider() ToolParameterDescriptor.create("limitToChromosomes", "Limit to Chromosomes", "If checked, the analysis will include only the primary chromosomes", "checkbox", new JSONObject() {{ put("checked", true); - }}, true) - ), null, "https://www.kingrelatedness.com/manual.shtml"); + }}, true), + ToolParameterDescriptor.create("excludedContigs", "Excluded Contigs", "A comma separated list of contigs to exclude, such as X,Y,MT.", "textfield", new JSONObject(){{ + + }}, "X,Y,MT") + ), null, "https://www.kingrelatedness.com/manual.shtml"); } @Override @@ -90,9 +94,9 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name); }).map(SAMSequenceRecord::getSequenceName).toList(); - if (toKeep.size() == 0) + if (toKeep.isEmpty()) { - getPipelineCtx().getLogger().info("The option to limit to chromosomes was selected, but no contigs were foudn with numeric names or names beginning with chr. All contigs will be used."); + getPipelineCtx().getLogger().info("The option to limit to chromosomes was selected, but no contigs were found with numeric names or names beginning with chr. All contigs will be used."); } else { @@ -101,6 +105,13 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno } } + String excludedContigs = StringUtils.trimToNull(getProvider().getParameterByName("excludedContigs").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); + if (excludedContigs != null) + { + plinkArgs.add("--not-chr"); + plinkArgs.add(excludedContigs); + } + plinkArgs.add("--allow-extra-chr"); plinkArgs.add("--silent"); From db9dc602ada41c7710dee87056e88b5f63d063f1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 12 Dec 2024 10:48:16 -0800 Subject: [PATCH 52/53] Auto-expand JBrowse browser --- jbrowse/src/client/JBrowse/Browser/Browser.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jbrowse/src/client/JBrowse/Browser/Browser.tsx b/jbrowse/src/client/JBrowse/Browser/Browser.tsx index 90b7033ec..58ca9f155 100644 --- a/jbrowse/src/client/JBrowse/Browser/Browser.tsx +++ b/jbrowse/src/client/JBrowse/Browser/Browser.tsx @@ -50,7 +50,7 @@ function View(){ return ( //TODO: can we make this expand to full page height? -
+
From d9a3e04c754f5d19333b5e2a1d35b9c209a2f820 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 12 Dec 2024 10:49:54 -0800 Subject: [PATCH 53/53] Add calculated column showing unique genomes used by readset --- .../query/SequenceAnalysisUserSchema.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java index 858cbcc92..28aabfd54 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java @@ -369,6 +369,18 @@ public void renderGridCellContents(RenderContext ctx, Writer out) throws IOExcep ret.addColumn(newCol); } + if (ret.getColumn("distinctOutputGenomes") == null) + { + String chr = ret.getSqlDialect().isPostgreSQL() ? "chr" : "char"; + SQLFragment sql = new SQLFragment("(SELECT ").append(ret.getSqlDialect().getGroupConcat(new SQLFragment("l.name"), true, true, new SQLFragment(chr + "(10)"))).append(new SQLFragment(" as expr FROM " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_OUTPUTFILES + " a JOIN " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_REF_LIBRARIES + " l ON (a.library_id = l.rowid) WHERE a.readset = " + ExprColumn.STR_TABLE_ALIAS + ".rowid)")); + ExprColumn newCol = new ExprColumn(ret, "distinctOutputGenomes", sql, JdbcType.VARCHAR, sourceTable.getColumn("rowid")); + newCol.setLabel("Output File Genomes For Readset"); + newCol.setWidth("200"); + newCol.setURL(DetailsURL.fromString("/query/executeQuery.view?schemaName=sequenceanalysis&query.queryName=outputfiles&query.readset~eq=${rowid}&query.library_id~isnonblank", ret.getContainer().isWorkbook() ? ret.getContainer().getParent() : ret.getContainer())); + + ret.addColumn(newCol); + } + if (ret.getColumn("totalForwardReads") == null) { SQLFragment sql = new SQLFragment("(SELECT SUM(q.metricvalue) as expr FROM " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_READ_DATA + " rd " +