diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java index 2f7a9f122..a83b2aca3 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java @@ -1,7 +1,9 @@ package org.labkey.api.sequenceanalysis.pipeline; +import org.jetbrains.annotations.Nullable; import org.labkey.api.data.Container; +import java.io.File; import java.util.Collection; import java.util.List; @@ -15,4 +17,9 @@ public interface JobResourceSettings List getParams(); Collection getDockerVolumes(Container c); + + default @Nullable File inferDockerVolume(File input) + { + return null; + } } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index 64097cd84..dd19bba68 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance) abstract public Collection getDockerVolumes(Container c); + /** + * The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container. + * This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder. + */ + abstract public @Nullable File inferDockerVolume(File input); + abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java index 09e2c5786..4b77e5dd7 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java @@ -97,7 +97,7 @@ default void validateScatter(ScatterGatherMethod method, PipelineJob job) throws } - default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { ctx.getLogger().debug("No additional merge tasks are implemented for: " + getClass().getName()); } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 12b2158fa..a91cea5c7 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -1,8 +1,8 @@ package org.labkey.api.sequenceanalysis.run; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; @@ -13,13 +13,24 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; public class DockerWrapper extends AbstractCommandWrapper { private final String _containerName; private final PipelineContext _ctx; private File _tmpDir = null; + private String _entryPoint = null; + private boolean _runPrune = true; + private String _alternateUserHome = null; + private final Map _dockerEnvironment = new HashMap<>(); public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { @@ -28,12 +39,32 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx) _ctx = ctx; } + public void setAlternateUserHome(String alternateUserHome) + { + _alternateUserHome = alternateUserHome; + } + public void setTmpDir(File tmpDir) { _tmpDir = tmpDir; } + public void setEntryPoint(String entryPoint) + { + _entryPoint = entryPoint; + } + + public void setRunPrune(boolean runPrune) + { + _runPrune = runPrune; + } + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException + { + executeWithDocker(containerArgs, workDir, tracker, null); + } + + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection inputFiles) throws PipelineJobException { File localBashScript = new File(workDir, "docker.sh"); File dockerBashScript = new File(workDir, "dockerRun.sh"); @@ -45,70 +76,131 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline { writer.println("#!/bin/bash"); writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); + writer.println("set -e"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + _containerName); - writer.println("sudo $DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); + writer.println("$DOCKER pull " + _containerName); + if (_runPrune) + { + writer.println("$DOCKER image prune -f"); + } + + writer.println("$DOCKER run --rm=true \\"); + writer.println("\t--group-add keep-groups \\"); + + // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: + File homeDir = new File(System.getProperty("user.home")); + if (homeDir.exists()) + { + if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith)) + { + writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\"); + } + else + { + _ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add"); + } + + _dockerEnvironment.put("USER_HOME", homeDir.getPath()); + } + + if (_alternateUserHome != null) + { + _dockerEnvironment.put("HOME", _alternateUserHome); + } + + _ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\")); + if (inputFiles != null) + { + inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\")); + } + if (_tmpDir != null) { - writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); + // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: + if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith)) + { + writer.println("\t-v '" + _tmpDir.getPath() + "':/tmp \\"); + } + else + { + _ctx.getLogger().debug("tmpDir already present in docker volumes, omitting"); + } + + addToDockerEnvironment("TMPDIR", _tmpDir.getPath()); + } + + if (_entryPoint != null) + { + writer.println("\t--entrypoint \"" + _entryPoint + "\"\\"); } - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); + + writer.println("\t-w " + workDir.getPath() + " \\"); + addToDockerEnvironment("WORK_DIR", workDir.getPath()); + Integer maxRam = SequencePipelineService.get().getMaxRam(); if (maxRam != null) { writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); writer.println("\t--memory='" + maxRam + "g' \\"); } + + for (String key : _dockerEnvironment.keySet()) + { + writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\"); + } writer.println("\t" + _containerName + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); + writer.println("\t" + dockerBashScript.getPath()); + writer.println("DOCKER_EXIT_CODE=$?"); + writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE"); + writer.println("exit $DOCKER_EXIT_CODE"); dockerWriter.println("#!/bin/bash"); dockerWriter.println("set -x"); dockerWriter.println(StringUtils.join(containerArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); + dockerWriter.println("BASH_EXIT_CODE=$?"); + dockerWriter.println("echo 'Bash exit code: '$BASH_EXIT_CODE"); + dockerWriter.println("exit $BASH_EXIT_CODE"); } catch (IOException e) { throw new PipelineJobException(e); } + localBashScript.setExecutable(true); + dockerBashScript.setExecutable(true); execute(Arrays.asList("/bin/bash", localBashScript.getPath())); } - public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + public void addToDockerEnvironment(String key, String value) { - try + _dockerEnvironment.put(key, value); + } + + private Collection inspectInputFiles(Collection inputFiles) + { + Set toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet()); + if (!toAdd.isEmpty()) { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } + Set paths = new HashSet<>(); + toAdd.forEach(x -> { + _ctx.getLogger().debug("Adding volume for path: " + x.getPath()); - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } + File converted = SequencePipelineService.get().inferDockerVolume(x); + if (!x.equals(converted)) + { + _ctx.getLogger().debug("added as: " + converted.getPath()); + } - output.addIntermediateFile(local); + if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith)) + { + paths.add(converted); + } + }); - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); + return paths; } + + return Collections.emptySet(); } } diff --git a/SequenceAnalysis/build.gradle b/SequenceAnalysis/build.gradle index e743cb653..b6e656670 100644 --- a/SequenceAnalysis/build.gradle +++ b/SequenceAnalysis/build.gradle @@ -194,20 +194,4 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null & << "\ncontext.pipelineConfig=${configDir.getAbsolutePath().replace("\\", "\\\\")}" } } -} - -project.tasks.register("copyJars", Copy) - { CopySpec copy -> - copy.group = "Build" - copy.description = "Copy commons-math3 JAR to module's lib directory" - - copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE) - copy.from(project.configurations.external) - copy.into new File("${project.labkey.explodedModuleLibDir}") - copy.include { - "**commons-math3-**.jar" - } - } - -project.tasks.named('module').configure { dependsOn(project.tasks.copyJars) } -project.tasks.named('copyJars').configure { mustRunAfter(project.tasks.populateExplodedLib) } +} \ No newline at end of file diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index a04deedb8..33686252b 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -188,9 +188,12 @@ then unzip paragraph-v2.4a-binary.zip rm paragraph-v2.4a-binary.zip + python3 -m pip install pysam intervaltree + cd ../ cp -R paragraph $LKTOOLS_DIR ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph + ln -s ${LKTOOLS_DIR}/paragraph/bin/idxdepth ${LKTOOLS_DIR}/idxdepth ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py else echo "Already installed" @@ -215,3 +218,14 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/multiqc || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf multiqc* + rm -Rf $LKTOOLS_DIR/multiqc* + + python3 -m pip install --user multiqc +else + echo "Already installed" +fi \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 338bf6939..61c9a0fe1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -104,6 +104,7 @@ import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; import org.labkey.sequenceanalysis.run.analysis.BamIterator; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillFromFastaStep; import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep; import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep; import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; @@ -365,6 +366,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider()); //handlers diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index 9716cac61..bfaaaba78 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -472,16 +472,28 @@ public Collection getDockerVolumes(Container c) { if (settings.isAvailable(c)) { - for (String volume : settings.getDockerVolumes(c)) - { - volumeLines.add("-v '" + volume + "':'" + volume + "'"); - } + return Collections.unmodifiableCollection(settings.getDockerVolumes(c)); } } return volumeLines; } + @Override + public @Nullable File inferDockerVolume(File input) + { + for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings()) + { + File ret = settings.inferDockerVolume(input); + if (ret != null) + { + return ret; + } + } + + return input; + } + @Override public List getSequenceJobInputFiles(PipelineJob job) { @@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings) @Override public Set getResourceSettings() { - return _resourceSettings; + return Collections.unmodifiableSet(_resourceSettings); } @Override diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 61ce01cd2..dea268e15 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -23,6 +23,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileType; import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; @@ -235,120 +236,68 @@ public GLNexusWrapper(Logger logger) super(logger); } - private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException { - try - { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } + DockerWrapper wrapper = new DockerWrapper("ghcr.io/dnanexus-rnd/glnexus:" + binVersion, ctx.getLogger(), ctx); + wrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + wrapper.setWorkingDir(ctx.getWorkingDirectory()); - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; + File bed = new File(ctx.getWorkingDirectory(), "contig.bed"); + tracker.addIntermediateFile(bed); + try (PrintWriter bedWriter = PrintWriters.getPrintWriter(bed)) + { + // Create a single-contig BED file: + bedWriter.println(rec.getSequenceName() + "\t0\t" + rec.getSequenceLength()); } catch (IOException e) { throw new PipelineJobException(e); } - } - public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException - { - File workDir = outputVcf.getParentFile(); - tracker.addIntermediateFile(outputVcf); - tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); + List dockerArgs = new ArrayList<>(); + dockerArgs.add("glnexus_cli"); + dockerArgs.add("--config " + configType); - List gvcfsLocal = new ArrayList<>(); - for (File f : inputGvcfs) + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) { - gvcfsLocal.add(ensureLocalCopy(f, workDir, tracker)); - ensureLocalCopy(new File(f.getPath() + ".tbi"), workDir, tracker); + dockerArgs.add("--mem-gbytes " + maxRam); } - File localBashScript = new File(workDir, "docker.sh"); - tracker.addIntermediateFile(localBashScript); - - File bed = new File(workDir, "contig.bed"); - tracker.addIntermediateFile(bed); + dockerArgs.add("--bed " + bed.getPath()); + dockerArgs.add("--trim-uncalled-alleles"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript);PrintWriter bedWriter = PrintWriters.getPrintWriter(bed)) + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (maxThreads != null) { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/dnanexus-rnd/glnexus:" + binVersion); - writer.println("sudo $DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t -w /work \\"); - if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) - { - writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); - } - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); + dockerArgs.add("--threads " + maxThreads); + } - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t--memory='" + maxRam + "g' \\"); - } - writer.println("\tghcr.io/dnanexus-rnd/glnexus:" + binVersion + " \\"); - writer.println("\tglnexus_cli \\"); - writer.println("\t--config " + configType + " \\"); - writer.println("\t--bed /work/" + bed.getName() + " \\"); - writer.println("\t--trim-uncalled-alleles \\"); + inputGvcfs.forEach(f -> { + dockerArgs.add(f.getPath()); + }); - if (maxRam != null) - { - writer.println("\t--mem-gbytes " + maxRam + "\\"); - } + File bcftools = BcftoolsRunner.getBcfToolsPath(); + File bgzip = BgzipRunner.getExe(); + dockerArgs.add(" | " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); - Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); - if (maxThreads != null) + // Command will fail if this exists: + File dbDir = new File (ctx.getWorkingDirectory(), "GLnexus.DB"); + tracker.addIntermediateFile(dbDir); + if (dbDir.exists()) + { + getLogger().debug("Deleting pre-existing GLnexus.DB dir"); + try { - writer.println("\t--threads " + maxThreads + " \\"); + FileUtils.deleteDirectory(dbDir); } - - gvcfsLocal.forEach(f -> { - writer.println("\t/work/" + f.getName() + " \\"); - }); - - File bcftools = BcftoolsRunner.getBcfToolsPath(); - File bgzip = BgzipRunner.getExe(); - writer.println("\t| " + bcftools.getPath() + " view | " + bgzip.getPath() + " -c > " + outputVcf.getPath()); - - // Command will fail if this exists: - File dbDir = new File (outputVcf.getParentFile(), "GLnexus.DB"); - tracker.addIntermediateFile(dbDir); - if (dbDir.exists()) + catch (IOException e) { - getLogger().debug("Deleting pre-existing GLnexus.DB dir"); - FileUtils.deleteDirectory(dbDir); + throw new PipelineJobException(e); } - - // Create a single-contig BED file: - bedWriter.println(rec.getSequenceName() + "\t0\t" + rec.getSequenceLength()); - } - catch (IOException e) - { - throw new PipelineJobException(e); } - setWorkingDir(workDir); - execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + wrapper.executeWithDocker(dockerArgs, ctx.getWorkingDirectory(), tracker, inputGvcfs); if (!outputVcf.exists()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index 3d339b43a..9ffea44dc 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -200,27 +200,8 @@ public boolean isJobComplete(PipelineJob job) if (!orphanJobs.isEmpty()) { - getJob().getLogger().info("## The following sequence jobs are not referenced by readsets, analyses or output files."); + getJob().getLogger().info("## There are {} sequence jobs are not referenced by readsets, analyses or output files.", orphanJobs.size()); getJob().getLogger().info("## The best action would be to view the pipeline job list, 'Sequence Jobs' view, and filter for jobs without sequence outputs. Deleting any unwanted jobs through the UI should also delete files."); - for (PipelineStatusFile sf : orphanJobs) - { - File f = new File(sf.getFilePath()).getParentFile(); - if (f.exists()) - { - long size = FileUtils.sizeOfDirectory(f); - //ignore if less than 1mb - if (size > 1e6) - { - getJob().getLogger().info("\n## size: " + FileUtils.byteCountToDisplaySize(size)); - getJob().getLogger().info("\n" + f.getPath()); - } - } - else - { - messages.add("## Pipeline job folder does not exist: " + sf.getRowId()); - messages.add(f.getPath()); - } - } } if (!messages.isEmpty()) @@ -388,8 +369,6 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile { if (!knownSequenceJobPaths.contains(subdir)) { - messages.add("#pipeline path listed as orphan, and not present in known job paths: "); - messages.add(subdir.getPath()); probableDeletes.add(subdir); unexpectedPipelineDirs.add(subdir); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java index 884711d43..c29fd8e05 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java @@ -898,7 +898,7 @@ else if (AbstractGenomicsDBImportHandler.TILE_DB_FILETYPE.isType(input)) } @Override - public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { List> providers = SequencePipelineService.get().getSteps(job, VariantProcessingStep.class); for (PipelineStepCtx stepCtx : providers) @@ -906,7 +906,7 @@ public void performAdditionalMergeTasks(JobContext ctx, PipelineJob job, TaskFil VariantProcessingStep vps = stepCtx.getProvider().create(ctx); if (vps instanceof VariantProcessingStep.SupportsScatterGather ssg) { - ssg.performAdditionalMergeTasks(ctx, job, manager, genome, orderedScatterOutputs, orderedJobDirs); + ssg.performAdditionalMergeTasks(ctx, job, genome, orderedScatterOutputs, orderedJobDirs); } } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 9d1998016..80fddbc7f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -57,8 +57,10 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * Created by bimber on 8/31/2016. @@ -137,7 +139,7 @@ public SequenceJob(String providerName, Container c, User u, @Nullable String jo writeParameters(params); _folderFileRoot = c.isWorkbook() ? PipelineService.get().findPipelineRoot(c.getParent()) : pipeRoot; - _dockerVolumes = SequencePipelineService.get().getDockerVolumes(c); + _dockerVolumes = new HashSet<>(SequencePipelineService.get().getDockerVolumes(c)); setLogFile(_getLogFile()); writeSupportToDisk(); @@ -190,12 +192,26 @@ public void setFolderFileRoot(PipeRoot folderFileRoot) public Collection getDockerVolumes() { - return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); + // TODO: this is for legacy jobs that included the -v arg. Eventually remove: + if (_dockerVolumes != null && _dockerVolumes.stream().anyMatch(x -> x.startsWith("-v"))) + { + _dockerVolumes = _dockerVolumes.stream().map(x -> { + if (x.startsWith("-v")) + { + x = x.split(":")[1]; + x = x.substring( 1, x.length() - 1); + } + + return x; + }).collect(Collectors.toSet()); + } + + return _dockerVolumes == null ? Collections.emptySet() : new HashSet<>(_dockerVolumes); } public void setDockerVolumes(Collection dockerVolumes) { - _dockerVolumes = dockerVolumes; + _dockerVolumes = dockerVolumes == null ? null : new HashSet<>(dockerVolumes); } public void setDescription(String description) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java index 5eeb8e50e..e7a9be1d7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteMergeTask.java @@ -110,18 +110,17 @@ private VariantProcessingJob getPipelineJob() { SequenceTaskHelper.logModuleVersions(getJob().getLogger()); RecordedAction action = new RecordedAction(ACTION_NAME); - TaskFileManagerImpl manager = new TaskFileManagerImpl(getPipelineJob(), _wd.getDir(), _wd); JobContextImpl ctx = new JobContextImpl(getPipelineJob(), getPipelineJob().getSequenceSupport(), getPipelineJob().getParameterJson(), _wd.getDir(), new TaskFileManagerImpl(getPipelineJob(), _wd.getDir(), _wd), _wd); File finalOut; SequenceOutputHandler handler = getPipelineJob().getHandler(); if (handler instanceof SequenceOutputHandler.HasCustomVariantMerge) { - finalOut = ((SequenceOutputHandler.HasCustomVariantMerge)handler).performVariantMerge(manager, action, handler, getJob()); + finalOut = ((SequenceOutputHandler.HasCustomVariantMerge)handler).performVariantMerge(ctx.getFileManager(), action, handler, getJob()); } else { - finalOut = runDefaultVariantMerge(ctx, manager, action, handler); + finalOut = runDefaultVariantMerge(ctx, action, handler); } Map scatterOutputs = getPipelineJob().getScatterJobOutputs(); @@ -136,7 +135,7 @@ private VariantProcessingJob getPipelineJob() if (finalOut != null) { SequenceOutputFile finalOutput = ((SequenceOutputHandler.TracksVCF) getPipelineJob().getHandler()).createFinalSequenceOutput(getJob(), finalOut, getPipelineJob().getFiles()); - manager.addSequenceOutput(finalOutput); + ctx.getFileManager().addSequenceOutput(finalOutput); } } else @@ -147,16 +146,16 @@ private VariantProcessingJob getPipelineJob() File cacheDir = getPipelineJob().getLocationForCachedInputs(_wd, false); if (cacheDir.exists()) { - manager.addIntermediateFile(cacheDir); + ctx.getFileManager().addIntermediateFile(cacheDir); } - manager.deleteIntermediateFiles(); - manager.cleanup(Collections.singleton(action)); + ctx.getFileManager().deleteIntermediateFiles(); + ctx.getFileManager().cleanup(Collections.singleton(action)); return new RecordedActionSet(action); } - private @Nullable File runDefaultVariantMerge(JobContextImpl ctx, TaskFileManagerImpl manager, RecordedAction action, SequenceOutputHandler handler) throws PipelineJobException + private @Nullable File runDefaultVariantMerge(JobContextImpl ctx, RecordedAction action, SequenceOutputHandler handler) throws PipelineJobException { Map> jobToIntervalMap = getPipelineJob().getJobToIntervalMap(); getJob().setStatus(PipelineJob.TaskStatus.running, "Combining Per-Contig VCFs: " + jobToIntervalMap.size()); @@ -186,9 +185,9 @@ else if (!vcf.exists()) toConcat.add(vcf); - manager.addInput(action, "Input VCF", vcf); - manager.addIntermediateFile(vcf); - manager.addIntermediateFile(new File(vcf.getPath() + ".tbi")); + ctx.getFileManager().addInput(action, "Input VCF", vcf); + ctx.getFileManager().addIntermediateFile(vcf); + ctx.getFileManager().addIntermediateFile(new File(vcf.getPath() + ".tbi")); } if (totalNull > 0 && !toConcat.isEmpty()) @@ -225,13 +224,13 @@ else if (!vcf.exists()) boolean sortAfterMerge = getPipelineJob().scatterMethodRequiresSort() || handler instanceof VariantProcessingStep.SupportsScatterGather && ((VariantProcessingStep.SupportsScatterGather) handler).doSortAfterMerge(); combined = SequenceAnalysisService.get().combineVcfs(toConcat, combined, genome, getJob().getLogger(), true, null, sortAfterMerge); } - manager.addOutput(action, "Merged VCF", combined); + ctx.getFileManager().addOutput(action, "Merged VCF", combined); } if (handler instanceof VariantProcessingStep.SupportsScatterGather) { ctx.getLogger().debug("Running additional merge tasks"); - ((VariantProcessingStep.SupportsScatterGather) handler).performAdditionalMergeTasks(ctx, getPipelineJob(), manager, genome, toConcat, new ArrayList<>(jobToIntervalMap.keySet())); + ((VariantProcessingStep.SupportsScatterGather) handler).performAdditionalMergeTasks(ctx, getPipelineJob(), genome, toConcat, new ArrayList<>(jobToIntervalMap.keySet())); } return combined; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java index 858cbcc92..28aabfd54 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/query/SequenceAnalysisUserSchema.java @@ -369,6 +369,18 @@ public void renderGridCellContents(RenderContext ctx, Writer out) throws IOExcep ret.addColumn(newCol); } + if (ret.getColumn("distinctOutputGenomes") == null) + { + String chr = ret.getSqlDialect().isPostgreSQL() ? "chr" : "char"; + SQLFragment sql = new SQLFragment("(SELECT ").append(ret.getSqlDialect().getGroupConcat(new SQLFragment("l.name"), true, true, new SQLFragment(chr + "(10)"))).append(new SQLFragment(" as expr FROM " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_OUTPUTFILES + " a JOIN " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_REF_LIBRARIES + " l ON (a.library_id = l.rowid) WHERE a.readset = " + ExprColumn.STR_TABLE_ALIAS + ".rowid)")); + ExprColumn newCol = new ExprColumn(ret, "distinctOutputGenomes", sql, JdbcType.VARCHAR, sourceTable.getColumn("rowid")); + newCol.setLabel("Output File Genomes For Readset"); + newCol.setWidth("200"); + newCol.setURL(DetailsURL.fromString("/query/executeQuery.view?schemaName=sequenceanalysis&query.queryName=outputfiles&query.readset~eq=${rowid}&query.library_id~isnonblank", ret.getContainer().isWorkbook() ? ret.getContainer().getParent() : ret.getContainer())); + + ret.addColumn(newCol); + } + if (ret.getColumn("totalForwardReads") == null) { SQLFragment sql = new SQLFragment("(SELECT SUM(q.metricvalue) as expr FROM " + SequenceAnalysisSchema.SCHEMA_NAME + "." + SequenceAnalysisSchema.TABLE_READ_DATA + " rd " + diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index e57fdf757..d7dc80fd1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -37,18 +37,12 @@ public BWAMem2Wrapper(@Nullable Logger logger) super(logger); } - public static class BWAMem2AlignmentStep extends BWAAlignmentStep + public static class BWAMem2AlignmentStep extends BWAMemAlignmentStep { public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger())); } - - @Override - public boolean doAddReadGroups() - { - return false; - } } public static class Provider extends AbstractAlignmentStepProvider diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java index 883b5b6fb..68599ecdd 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java @@ -38,9 +38,14 @@ public BWAMemWrapper(@Nullable Logger logger) public static class BWAMemAlignmentStep extends BWAAlignmentStep { + public BWAMemAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, BWAMemWrapper wrapper) + { + super(provider, ctx, wrapper); + } + public BWAMemAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { - super(provider, ctx, new BWAMemWrapper(ctx.getLogger())); + this(provider, ctx, new BWAMemWrapper(ctx.getLogger())); } @Override diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index fa929a0ad..838f00f7e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -44,11 +44,17 @@ public ParagraphStep() {{ put("allowBlank", false); }}, null), - ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{ + ToolParameterDescriptor.create("doBndSubset", "Filter Input VCF", "If selected, prior to running SelectVariants will be run to remove BNDs sites with POS<150 and symbolic INS without ALT sequence", "checkbox", new JSONObject(){{ put("checked", false); }}, false), ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ put("checked", false); + }}, false), + ToolParameterDescriptor.create("verbose", "Verbose Logging", "If checked, --verbose will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ + put("checked", false); + }}, false), + ToolParameterDescriptor.create("retrieveReferenceSeq", "Retrieve Reference Sequence", "If checked, --debug will be passed to paragraph to increase logging", "checkbox", new JSONObject(){{ + put("checked", false); }}, false) )); } @@ -124,7 +130,7 @@ else if (!svVcf.exists()) SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger()); List selectArgs = new ArrayList<>(); selectArgs.add("-select"); - selectArgs.add("SVTYPE != 'BND' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); + selectArgs.add("SVTYPE != 'BND' && SVTYPE != 'DUP' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); selectArgs.add("--exclude-filtered"); selectArgs.add("--exclude-filtered"); selectArgs.add("--sites-only-vcf-output"); @@ -158,7 +164,24 @@ else if (!svVcf.exists()) depthArgs.add(threads.toString()); } - new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); + File doneFile = new File(ctx.getWorkingDirectory(), "idxdepth.done"); + ctx.getFileManager().addIntermediateFile(doneFile); + if (doneFile.exists()) + { + ctx.getLogger().info("idxdepth already performed, skipping"); + } + else + { + new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); + try + { + FileUtils.touch(doneFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } if (!coverageJson.exists()) { @@ -168,7 +191,7 @@ else if (!svVcf.exists()) // Should produce a simple text file: // id path depth read length - // TNPRC-IB18 ../IB18.cram 29.77 150 + // IB18 ../IB18.cram 29.77 150 File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); String rgId = null; try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) @@ -196,7 +219,7 @@ else if (!svVcf.exists()) } double readLength = json.getInt("read_length"); - writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength); + writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength); } catch (IOException e) { @@ -205,33 +228,52 @@ else if (!svVcf.exists()) ctx.getFileManager().addIntermediateFile(coverageFile); DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); + dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); - dockerWrapper.ensureLocalCopy(so.getFile(), ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), ctx.getWorkingDirectory(), ctx.getFileManager()); - File paragraphOutDir = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile())); paragraphArgs.add("-o"); - paragraphArgs.add("/work/" + paragraphOutDir.getName()); + paragraphArgs.add(paragraphOutDir.getPath()); + + File scratchDir = new File(ctx.getOutputDir(), "pgScratch"); + if (scratchDir.exists()) + { + try + { + FileUtils.deleteDirectory(scratchDir); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + paragraphArgs.add("--scratch-dir"); + paragraphArgs.add(scratchDir.getPath()); + + ctx.getFileManager().addIntermediateFile(scratchDir); paragraphArgs.add("-i"); - dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager()); - paragraphArgs.add("/work/" + svVcf.getName()); + paragraphArgs.add(svVcf.getPath()); paragraphArgs.add("-m"); - paragraphArgs.add("/work/" + coverageFile.getName()); + paragraphArgs.add(coverageFile.getPath()); + + if (ctx.getParams().optBoolean("verbose", false)) + { + paragraphArgs.add("--verbose"); + } paragraphArgs.add("-r"); File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(); - dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager()); - dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager()); - paragraphArgs.add("/work/" + genomeFasta.getName()); + paragraphArgs.add(genomeFasta.getPath()); - paragraphArgs.add("--scratch-dir"); - paragraphArgs.add("/tmp"); - dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); + if (ctx.getParams().optBoolean("retrieveReferenceSeq", false)) + { + paragraphArgs.add("--retrieve-reference-sequence"); + } if (threads != null) { @@ -239,7 +281,7 @@ else if (!svVcf.exists()) paragraphArgs.add(threads.toString()); } - dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager()); + dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager(), Arrays.asList(so.getFile(), genomeFasta, svVcf)); File genotypes = new File(paragraphOutDir, "genotypes.vcf.gz"); if (!genotypes.exists()) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java new file mode 100644 index 000000000..2068f56d6 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/BcftoolsFillFromFastaStep.java @@ -0,0 +1,116 @@ +package org.labkey.sequenceanalysis.run.analysis; + +import htsjdk.samtools.util.Interval; +import org.apache.commons.lang3.StringUtils; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +public class BcftoolsFillFromFastaStep extends AbstractCommandPipelineStep implements VariantProcessingStep +{ + public BcftoolsFillFromFastaStep(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BcftoolsRunner(ctx.getLogger())); + } + + public static class Provider extends AbstractVariantProcessingStepProvider implements SupportsScatterGather + { + public Provider() + { + super("BcftoolsFillFromFastaStep", "Bcftools Fill-From-FASTA", "bcftools", "This will replace REF alleles listed as N with the sequence from the FASTA file.", Arrays.asList( + + ), null, null); + } + + @Override + public BcftoolsFillFromFastaStep create(PipelineContext ctx) + { + return new BcftoolsFillFromFastaStep(this, ctx); + } + } + + @Override + public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException + { + VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); + + List options = new ArrayList<>(); + options.add(BcftoolsRunner.getBcfToolsPath().getPath()); + options.add("+fill-form-fasta"); + + options.add(inputVCF.getPath()); + + if (intervals != null) + { + options.add("--regions"); + options.add(intervals.stream().map(interval -> interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()).collect(Collectors.joining(","))); + } + + options.add("-O"); + options.add("z9"); + + Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (threads != null) + { + options.add("--threads"); + options.add(threads.toString()); + } + + File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".fill.vcf.gz"); + options.add("-o"); + options.add(outputVcf.getPath()); + + options.add("--"); + + options.add("-"); + options.add(genome.getWorkingFastaFile().getPath()); + + options.add("-c"); + options.add("REF"); + + BcftoolsRunner wrapper = getWrapper(); + + String bcfPluginDir = StringUtils.trimToNull(System.getenv("BCFTOOLS_PLUGINS")); + if (bcfPluginDir != null) + { + getPipelineCtx().getLogger().debug("Setting BCFTOOLS_PLUGINS environment variable: " + bcfPluginDir); + wrapper.addToEnvironment("BCFTOOLS_PLUGINS", bcfPluginDir); + } + + wrapper.execute(options); + if (!outputVcf.exists()) + { + throw new PipelineJobException("output not found: " + outputVcf); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(outputVcf, getWrapper().getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + output.setVcf(outputVcf); + + return output; + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index 0490ed063..3f8968831 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -1,7 +1,5 @@ package org.labkey.sequenceanalysis.run.analysis; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; @@ -20,13 +18,10 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; -import org.labkey.api.writer.PrintWriters; -import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -194,32 +189,6 @@ public DeepVariantWrapper(Logger logger) super(logger); } - private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException - { - try - { - if (workingDirectory.equals(input.getParentFile())) - { - return input; - } - - File local = new File(workingDirectory, input.getName()); - if (!local.exists()) - { - getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs, PipelineContext ctx) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); @@ -230,24 +199,17 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai tracker.addIntermediateFile(new File(outputVcf.getPath() + ".tbi")); } - File inputBamLocal = ensureLocalCopy(inputBam, workDir, tracker); - ensureLocalCopy(SequenceUtil.getExpectedIndex(inputBam), workDir, tracker); - - File refFastaLocal = ensureLocalCopy(refFasta, workDir, tracker); - ensureLocalCopy(new File(refFasta.getPath() + ".fai"), workDir, tracker); - ensureLocalCopy(new File(FileUtil.getBaseName(refFasta.getPath()) + ".dict"), workDir, tracker); + List inputFiles = new ArrayList<>(); - File localBashScript = new File(workDir, "docker.sh"); - File dockerBashScript = new File(workDir, "dockerRun.sh"); - tracker.addIntermediateFile(localBashScript); - tracker.addIntermediateFile(dockerBashScript); + inputFiles.add(inputBam); + inputFiles.add(refFasta); List bashArgs = new ArrayList<>(Arrays.asList("/opt/deepvariant/bin/run_deepvariant")); bashArgs.add("--make_examples_extra_args='normalize_reads=true'"); - bashArgs.add("--ref=/work/" + refFastaLocal.getName()); - bashArgs.add("--reads=/work/" + inputBamLocal.getName()); - bashArgs.add("--output_gvcf=/work/" + outputGvcf.getName()); - bashArgs.add("--output_vcf=/work/" + outputVcf.getName()); + bashArgs.add("--ref=" + refFasta.getPath()); + bashArgs.add("--reads=" + inputBam.getPath()); + bashArgs.add("--output_gvcf=" + outputGvcf.getPath()); + bashArgs.add("--output_vcf=" + outputVcf.getPath()); Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger()); if (maxThreads != null) { @@ -259,52 +221,9 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai bashArgs.addAll(extraArgs); } - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull google/deepvariant:" + binVersion); - writer.println("sudo $DOCKER run --rm=true \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) - { - writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); - } - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - writer.println("\tgoogle/deepvariant:" + binVersion + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - - dockerWriter.println("#!/bin/bash"); - dockerWriter.println("set -x"); - dockerWriter.println(StringUtils.join(bashArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - setWorkingDir(workDir); - execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + DockerWrapper wrapper = new DockerWrapper("google/deepvariant:" + binVersion, ctx.getLogger(), ctx); + wrapper.setEntryPoint("/bin/bash"); + wrapper.executeWithDocker(bashArgs, ctx.getWorkingDirectory(), tracker, inputFiles); if (!outputGvcf.exists()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index 6e5e4320d..fe4b571c3 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -3,7 +3,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.variant.variantcontext.VariantContext; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.json.JSONArray; @@ -31,11 +30,9 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; -import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; import org.labkey.sequenceanalysis.util.SequenceUtil; @@ -43,7 +40,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.PrintWriter; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -141,69 +137,12 @@ public static File getJsonFile(File outputDir, File consensusFasta) public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir, PipelineContext ctx) throws PipelineJobException { - if (!consensusFasta.getParentFile().equals(outputDir)) - { - try - { - File consensusFastaLocal = new File(outputDir, consensusFasta.getName()); - log.info("Copying FASTA locally: " + consensusFastaLocal.getPath()); - FileUtils.copyFile(consensusFasta, consensusFastaLocal); - tracker.addIntermediateFile(consensusFastaLocal); - consensusFasta = consensusFastaLocal; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - File jsonFile = getJsonFile(outputDir, consensusFasta); - File localBashScript = new File(outputDir, "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull nextstrain/nextclade:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t-w /work \\"); - writer.println("\tnextstrain/nextclade:latest \\"); - writer.println("\t/bin/bash -c \"nextclade dataset get --name='sars-cov-2' --output-dir='/work/data/sars-cov-2';nextclade run --input-dataset='/work/data/sars-cov-2' --output-json '/work/" + jsonFile.getName() + "' '" + consensusFasta.getName() + "'\" && rm -Rf /work/data"); - writer.println(""); - writer.println("echo 'Bash script complete'"); - writer.println(""); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + DockerWrapper wrapper = new DockerWrapper("nextstrain/nextclade:latest", ctx.getLogger(), ctx); + File dataDir = new File(outputDir, "data"); - tracker.addIntermediateFile(localBashScript); + wrapper.executeWithDocker(Arrays.asList("bin/bash", "-c \"nextclade dataset get --name='sars-cov-2' --output-dir='" + dataDir.getPath() + "/sars-cov-2';nextclade run --input-dataset='" + dataDir.getPath() + "/sars-cov-2' --output-json '" + jsonFile.getPath() + "' '" + consensusFasta.getPath() + "'\" && rm -Rf " + dataDir), ctx.getWorkingDirectory(), tracker); if (!jsonFile.exists()) { @@ -219,7 +158,7 @@ private static JSONObject parseNextClade(File jsonFile, Logger log) throws Pipel { JSONObject results = new JSONObject(IOUtil.readFully(is)); JSONArray samples = results.getJSONArray("results"); - if (samples.length() == 0) + if (samples.isEmpty()) { log.info("No samples found in NextClade JSON, this probably means no clade was assigned"); return null; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index 56608dde2..2cccb8b9e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -30,19 +30,16 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; -import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; -import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; -import java.io.PrintWriter; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -266,67 +263,9 @@ public static File getRenamedPangolinOutput(File consensusFasta, PANGO_MODE mode private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs, PipelineContext ctx) throws PipelineJobException { - if (!consensusFasta.getParentFile().equals(outputDir)) - { - try - { - File consensusFastaLocal = new File(outputDir, consensusFasta.getName()); - log.info("Copying FASTA locally: " + consensusFastaLocal.getPath()); - FileUtils.copyFile(consensusFasta, consensusFastaLocal); - tracker.addIntermediateFile(consensusFastaLocal); - consensusFasta = consensusFastaLocal; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - File localBashScript = new File(outputDir, "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/bimberlabinternal/pangolin:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } - - String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t-w /work \\"); - writer.println("\tghcr.io/bimberlabinternal/pangolin:latest \\"); - writer.println("\tpangolin" + extraArgString + " '/work/" + consensusFasta.getName() + "'"); - writer.println(""); - writer.println("echo 'Bash script complete'"); - writer.println(""); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); - tracker.addIntermediateFile(localBashScript); + DockerWrapper wrapper = new DockerWrapper("ghcr.io/bimberlabinternal/pangolin:latest", ctx.getLogger(), ctx); + String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); + wrapper.executeWithDocker(Arrays.asList("pangolin" + extraArgString + " '" + consensusFasta.getPath() + "'"), ctx.getWorkingDirectory(), tracker); File output = new File(outputDir, "lineage_report.csv"); if (!output.exists()) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java index f30999c92..8252c7876 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/FastqcRunner.java @@ -374,7 +374,8 @@ private List getBaseParams() throws FileNotFoundException throw new RuntimeException("Not found: " + htsjdkJar.getPath()); } - File commonsMath = new File(libDir, "commons-math3-3.6.1.jar"); + File apiLibDir = new File(ModuleLoader.getInstance().getModule("api").getExplodedPath(), "lib"); + File commonsMath = new File(apiLibDir, "commons-math3-3.6.1.jar"); if (!commonsMath.exists()) { throw new RuntimeException("Not found: " + commonsMath.getPath()); @@ -386,13 +387,6 @@ private List getBaseParams() throws FileNotFoundException throw new RuntimeException("Not found: " + jhdf5.getPath()); } - // NOTE: FastQC expects an alternate package name within this JAR, so use their packaged code instead: -// File base64 = new File(libDir, "base64-2.3.8.jar"); -// if (!base64.exists()) -// { -// throw new RuntimeException("Not found: " + base64.getPath()); -// } - List classPath = new ArrayList<>(); classPath.add("."); classPath.add(fastqcDir.getPath()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index aa7d6da7b..62c4a3c0e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -13,6 +13,7 @@ import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; @@ -43,8 +44,11 @@ public Provider() ToolParameterDescriptor.create("limitToChromosomes", "Limit to Chromosomes", "If checked, the analysis will include only the primary chromosomes", "checkbox", new JSONObject() {{ put("checked", true); - }}, true) - ), null, "https://www.kingrelatedness.com/manual.shtml"); + }}, true), + ToolParameterDescriptor.create("excludedContigs", "Excluded Contigs", "A comma separated list of contigs to exclude, such as X,Y,MT.", "textfield", new JSONObject(){{ + + }}, "X,Y,MT") + ), null, "https://www.kingrelatedness.com/manual.shtml"); } @Override @@ -90,9 +94,9 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name); }).map(SAMSequenceRecord::getSequenceName).toList(); - if (toKeep.size() == 0) + if (toKeep.isEmpty()) { - getPipelineCtx().getLogger().info("The option to limit to chromosomes was selected, but no contigs were foudn with numeric names or names beginning with chr. All contigs will be used."); + getPipelineCtx().getLogger().info("The option to limit to chromosomes was selected, but no contigs were found with numeric names or names beginning with chr. All contigs will be used."); } else { @@ -101,6 +105,13 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno } } + String excludedContigs = StringUtils.trimToNull(getProvider().getParameterByName("excludedContigs").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class)); + if (excludedContigs != null) + { + plinkArgs.add("--not-chr"); + plinkArgs.add(excludedContigs); + } + plinkArgs.add("--allow-extra-chr"); plinkArgs.add("--silent"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java index 7191b8d8b..20c076c54 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SplitVcfBySamplesStep.java @@ -90,7 +90,7 @@ private List findProducedVcfs(File inputVCF, File outputDirectory) } @Override - public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException + public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List orderedScatterOutputs, List orderedJobDirs) throws PipelineJobException { job.getLogger().info("Merging additional track VCFs"); File inputVCF = ((SequenceJob)getPipelineCtx().getJob()).getInputFiles().get(0); @@ -133,7 +133,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi so.setFile(combined); so.setCategory("VCF File"); so.setLibrary_id(genome.getGenomeId()); - manager.addSequenceOutput(so); + ctx.getFileManager().addSequenceOutput(so); } } diff --git a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java index 52e9ae06c..017ce8e46 100644 --- a/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java +++ b/cluster/src/org/labkey/cluster/pipeline/SlurmExecutionEngine.java @@ -1,7 +1,6 @@ package org.labkey.cluster.pipeline; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.math.NumberUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -14,15 +13,21 @@ import org.labkey.api.data.ContainerManager; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.PipelineService; +import org.labkey.api.pipeline.PipelineStatusFile; import org.labkey.api.util.FileUtil; import org.labkey.api.util.Pair; +import org.labkey.api.writer.PrintWriters; import org.labkey.cluster.ClusterManager; import org.labkey.cluster.ClusterServiceImpl; import org.quartz.JobExecutionException; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -242,6 +247,8 @@ protected Pair getStatusForJob(ClusterJob job, Container c) int stateIdx = -1; int hostnameIdx = -1; int maxRssIdx = -1; + int reqMemIdx = -1; + String reqMem = null; for (String line : ret) { line = StringUtils.trimToNull(line); @@ -258,6 +265,7 @@ protected Pair getStatusForJob(ClusterJob job, Container c) stateIdx = header.indexOf("STATE"); hostnameIdx = header.indexOf("NODELIST"); maxRssIdx = header.indexOf("MAXRSS"); + reqMemIdx = header.indexOf("REQMEM"); if (stateIdx == -1) { @@ -298,18 +306,49 @@ else if (headerFound) } } + if (reqMemIdx > -1 && reqMemIdx < tokens.length) + { + String val = StringUtils.trimToNull(tokens[reqMemIdx]); + if (val != null) + { + reqMem = val; + } + + } + // NOTE: if the line has blank ending columns, trimmed lines might lack that value - if (maxRssIdx > -1 && maxRssIdx < tokens.length) + if ((job.getClusterId() + ".0").equals(id) && maxRssIdx > -1 && maxRssIdx < tokens.length) { try { - if (NumberUtils.isCreatable(tokens[maxRssIdx])) + String maxRSS = StringUtils.trimToNull(tokens[maxRssIdx]); + if (maxRSS != null) { - long bytes = FileSizeFormatter.convertStringRepresentationToBytes(tokens[maxRssIdx]); - long requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(getConfig().getRequestMemory() + "G"); //request is always GB - if (bytes > requestInBytes) + double bytes = FileSizeFormatter.convertStringRepresentationToBytes(maxRSS); + if (reqMem == null) + { + _log.warn("Unable to find ReqMem for slurm job: " + job.getClusterId()); + } + else { - info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G"; + double requestInBytes = FileSizeFormatter.convertStringRepresentationToBytes(reqMem); + if (bytes > requestInBytes) + { + info = "Job exceeded memory, max was: " + FileSizeFormatter.convertBytesToUnit(bytes, 'G') + "G, requested memory was: " + FileSizeFormatter.convertBytesToUnit(requestInBytes, 'G'); + + PipelineStatusFile sf = PipelineService.get().getStatusFile(job.getJobId()); + if (sf != null) + { + try (PrintWriter writer = PrintWriters.getPrintWriter(new File(sf.getFilePath()), StandardOpenOption.APPEND)) + { + writer.println(info + ". Raw slurm value: " + maxRSS); + } + catch (FileNotFoundException e) + { + _log.error("Unable to find log file for job, " + job.getJobId() + ": " + sf.getFilePath()); + } + } + } } } } @@ -760,13 +799,13 @@ private Pair getStatusFromQueue(ClusterJob job) // Based on: https://stackoverflow.com/questions/3758606/how-can-i-convert-byte-size-into-a-human-readable-format-in-java private static class FileSizeFormatter { - public static long convertStringRepresentationToBytes(final String value) + public static double convertStringRepresentationToBytes(final String value) { try { char unit = value.toUpperCase().charAt(value.length() - 1); long sizeFactor = getSizeFactor(unit); - long size = Long.parseLong(value.substring(0, value.length() - 1)); + double size = Double.parseDouble(value.substring(0, value.length() - 1)); return size * sizeFactor; } @@ -776,11 +815,11 @@ public static long convertStringRepresentationToBytes(final String value) } } - public static long convertBytesToUnit(final long bytes, final char unit) + public static double convertBytesToUnit(final double bytes, final char unit) { long sizeFactor = getSizeFactor(unit); - return bytes / sizeFactor; + return bytes / (double)sizeFactor; } private static long getSizeFactor(char unit) @@ -806,11 +845,11 @@ public static class TestCase @Test public void testFileSizeFormatter() { - long bytes = FileSizeFormatter.convertStringRepresentationToBytes("1362624K"); - Assert.assertEquals("Incorrect byte value", 1395326976, bytes); + double bytes = FileSizeFormatter.convertStringRepresentationToBytes("1362624K"); + Assert.assertEquals("Incorrect byte value", 1395326976.0, bytes, 0.0); - long val2 = FileSizeFormatter.convertBytesToUnit(bytes, 'K'); - Assert.assertEquals("Incorrect string value", 1362624, val2); + double val2 = FileSizeFormatter.convertBytesToUnit(bytes, 'K'); + Assert.assertEquals("Incorrect string value", 1362624.0, val2, 0.0); } } } diff --git a/jbrowse/src/client/JBrowse/Browser/Browser.tsx b/jbrowse/src/client/JBrowse/Browser/Browser.tsx index 90b7033ec..58ca9f155 100644 --- a/jbrowse/src/client/JBrowse/Browser/Browser.tsx +++ b/jbrowse/src/client/JBrowse/Browser/Browser.tsx @@ -50,7 +50,7 @@ function View(){ return ( //TODO: can we make this expand to full page height? -
+
diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index cfd24d865..a6f314dd3 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -153,7 +153,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu if (methodStr2 != null) { ret.consensusMethods = extractMethods(methodStr2); - if (!ret.methods.containsAll(ret.consensusMethods)) + if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods)) { throw new PipelineJobException("All consensusMethods must be present in methods: " + methodStr2); } @@ -189,7 +189,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse if (ret.consensusMethods != null && !ret.consensusMethods.isEmpty()) { - if (!ret.methods.containsAll(ret.consensusMethods)) + if (!new HashSet<>(ret.methods).containsAll(ret.consensusMethods)) { throw new PipelineJobException("All consensusMethods must be present in methods: " + ret.consensusMethods.stream().map(CALLING_METHOD::name).collect(Collectors.joining(","))); } @@ -326,6 +326,7 @@ public Set getAllowableBarcodeNames() throws PipelineJobException public enum CALLING_METHOD { multiseq(true, false), + multiseqOnLargeData(true, true, false, 10000, "multiseq"), htodemux(false, false), dropletutils(true, true), gmm_demux(true, true), @@ -337,6 +338,8 @@ public enum CALLING_METHOD boolean isDefaultRun; boolean isDefaultConsensus; boolean requiresH5; + int minCells; + String label; CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus) { @@ -344,10 +347,17 @@ public enum CALLING_METHOD } CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5) + { + this(isDefaultRun, isDefaultConsensus, requiresH5, 0, null); + } + + CALLING_METHOD(boolean isDefaultRun, boolean isDefaultConsensus, boolean requiresH5, int minCells, String label) { this.isDefaultRun = isDefaultRun; this.isDefaultConsensus = isDefaultConsensus; this.requiresH5 = requiresH5; + this.minCells = minCells; + this.label = label; } public boolean isDefaultRun() @@ -360,6 +370,16 @@ public boolean isDefaultConsensus() return isDefaultConsensus; } + public int getMinCells() + { + return minCells; + } + + public String getLabel() + { + return label == null ? name() : label; + } + public boolean isRequiresH5() { return requiresH5; diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index e5412f2d3..d24f1abf1 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -17,7 +17,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -271,7 +272,7 @@ protected boolean hasCompleted() protected void executeR(SequenceOutputHandler.JobContext ctx, File rmd, String outputPrefix) throws PipelineJobException { List lines = new ArrayList<>(); - lines.add("rmarkdown::render(output_file = '" + getExpectedHtmlFile(ctx, outputPrefix).getName() + "', input = '" + rmd.getName() + "', intermediates_dir = '/work')"); + lines.add("rmarkdown::render(output_file = '" + getExpectedHtmlFile(ctx, outputPrefix).getName() + "', input = '" + rmd.getName() + "', intermediates_dir = '" + ctx.getWorkingDirectory() + "')"); lines.add("print('Rmarkdown complete')"); lines.add(""); @@ -287,7 +288,7 @@ protected void executeR(SequenceOutputHandler.JobContext ctx, File rmd, String o seuratThreads = getProvider().getParameterByName(SEURAT_THREADS).extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class, null); } - executeR(ctx, getDockerContainerName(), outputPrefix, lines, seuratThreads, getDockerHomeDir()); + executeR(ctx, getDockerContainerName(), outputPrefix, lines, seuratThreads, getDockerHomeDir(), getAdditionalDockerInputs(ctx)); handlePossibleFailure(ctx, outputPrefix); } @@ -299,8 +300,14 @@ protected static SeuratToolParameter getSeuratThreadsParam() }}, null); } - public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List lines, @Nullable Integer seuratThreads, @Nullable String dockerHomeDir) throws PipelineJobException + public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerContainerName, String outputPrefix, List lines, @Nullable Integer seuratThreads, @Nullable String alternateUserHomeDir, Collection additionalDockerInputs) throws PipelineJobException { + DockerWrapper wrapper = new DockerWrapper(dockerContainerName, ctx.getLogger(), ctx); + if (alternateUserHomeDir != null) + { + wrapper.setAlternateUserHome(alternateUserHomeDir); + } + File localRScript = new File(ctx.getOutputDir(), FileUtil.makeLegalName(outputPrefix + ".R").replaceAll(" ", "_")); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { @@ -311,74 +318,24 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC throw new PipelineJobException(e); } - File localBashScript = new File(ctx.getOutputDir(), "dockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) + if (seuratThreads != null) { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + dockerContainerName); - writer.println("sudo $DOCKER run --rm=true \\"); - Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); - if (maxThreads != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS=" + maxThreads + " \\"); - } - - if (seuratThreads != null) - { - if (maxThreads != null && maxThreads < seuratThreads) - { - seuratThreads = maxThreads; - } - - writer.println("\t-e SEURAT_MAX_THREADS=" + seuratThreads + " \\"); - } - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) + if (maxThreads != null && maxThreads < seuratThreads) { - //int swap = 4*maxRam; - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); + seuratThreads = maxThreads; } - File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t-e TMPDIR=/tmp \\"); - if (dockerHomeDir != null) - { - writer.println("\t-e HOME=" + dockerHomeDir + " \\"); - } - writer.println("\t-w /work \\"); - //NOTE: this seems to disrupt packages installed into home - //writer.println("\t-e HOME=/homeDir \\"); - writer.println("\t" + dockerContainerName + " \\"); - writer.println("\tRscript --vanilla '" + localRScript.getName() + "'"); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Bash script complete: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); + wrapper.addToDockerEnvironment("SEURAT_MAX_THREADS", seuratThreads.toString()); } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); - rWrapper.setWorkingDir(ctx.getOutputDir()); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); + wrapper.setTmpDir(tmpDir); + + wrapper.setWorkingDir(ctx.getOutputDir()); + wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", "'" + localRScript.getName() + "'"), ctx.getWorkingDirectory(), ctx.getFileManager(), additionalDockerInputs); localRScript.delete(); - localBashScript.delete(); } public String getDockerHomeDir() @@ -487,7 +444,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + return Collections.emptySet(); + } + protected String printInputFile(SeuratObjectWrapper so) { return "'" + so.getFile().getName() + "'"; diff --git a/singlecell/resources/chunks/AppendMetadata.R b/singlecell/resources/chunks/AppendMetadata.R index 406e02395..addefee43 100644 --- a/singlecell/resources/chunks/AppendMetadata.R +++ b/singlecell/resources/chunks/AppendMetadata.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 040317e71..7a913d757 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) # NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted diff --git a/singlecell/resources/chunks/AppendTcr.R b/singlecell/resources/chunks/AppendTcr.R index b4c5c41a6..bc6898617 100644 --- a/singlecell/resources/chunks/AppendTcr.R +++ b/singlecell/resources/chunks/AppendTcr.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/AvgExpression.R b/singlecell/resources/chunks/AvgExpression.R index 78426ff12..5218624f6 100644 --- a/singlecell/resources/chunks/AvgExpression.R +++ b/singlecell/resources/chunks/AvgExpression.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) GenerateAveragedData <- function(seuratObj, groupFields, addMetadata) { diff --git a/singlecell/resources/chunks/ClassifyTNKByExpression.R b/singlecell/resources/chunks/ClassifyTNKByExpression.R index 2e3f7bb1a..e7a00f83c 100644 --- a/singlecell/resources/chunks/ClassifyTNKByExpression.R +++ b/singlecell/resources/chunks/ClassifyTNKByExpression.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/FindClustersAndDimRedux.R b/singlecell/resources/chunks/FindClustersAndDimRedux.R index f002dd905..1aadac4c8 100644 --- a/singlecell/resources/chunks/FindClustersAndDimRedux.R +++ b/singlecell/resources/chunks/FindClustersAndDimRedux.R @@ -1,3 +1,20 @@ +if (!reticulate::py_module_available(module = 'leidenalg')) { + logger::log_warn('python leidenalg not found!') + logger::log_warn(paste0('Python available: ', reticulate::py_available())) + logger::log_warn('Python config') + pyConfig <- reticulate::py_config() + for (pn in names(pyConfig)) { + logger::log_warn(paste0(pn, ': ', paste0(pyConfig[[pn]]), collapse = ',')) + } + + logger::log_warn(paste0('pythonpath: ', reticulate::py_config()$pythonpath)) + + logger::log_warn('Python packages:') + for (pn in reticulate::py_list_packages()$package) { + logger::log_warn(pn) + } +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) diff --git a/singlecell/resources/chunks/RunConga.R b/singlecell/resources/chunks/RunConga.R index 36091ee4e..12414779f 100644 --- a/singlecell/resources/chunks/RunConga.R +++ b/singlecell/resources/chunks/RunConga.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/RunScMetabolism.R b/singlecell/resources/chunks/RunScMetabolism.R deleted file mode 100644 index 3eabe5611..000000000 --- a/singlecell/resources/chunks/RunScMetabolism.R +++ /dev/null @@ -1,14 +0,0 @@ -for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - - for (metabolismType in metabolismTypes) { - seuratObj <- CellMembrane::RunScMetabolism(seuratObj, metabolismType = metabolismType) - } - - saveData(seuratObj, datasetId) - - # Cleanup - rm(seuratObj) - gc() -} \ No newline at end of file diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R index ee0726402..fd8d4e931 100644 --- a/singlecell/resources/chunks/StudyMetadata.R +++ b/singlecell/resources/chunks/StudyMetadata.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/SummarizeTCellActivation.R b/singlecell/resources/chunks/SummarizeTCellActivation.R index ae7fa52d8..b03c351ee 100644 --- a/singlecell/resources/chunks/SummarizeTCellActivation.R +++ b/singlecell/resources/chunks/SummarizeTCellActivation.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/chunks/TrainCelltypist.R b/singlecell/resources/chunks/TrainCelltypist.R index 543c51f85..8f6866d10 100644 --- a/singlecell/resources/chunks/TrainCelltypist.R +++ b/singlecell/resources/chunks/TrainCelltypist.R @@ -6,4 +6,4 @@ datasetId <- names(seuratObjects)[[1]] printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) -RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = '/work') \ No newline at end of file +RIRA::TrainCellTypist(seuratObj, labelField = labelField, minCellsPerClass = minCellsPerClass, excludedClasses = excludedClasses, modelFile = modelFile, featureInclusionList = featureInclusionList, featureExclusionList = featureExclusionList, tempFileLocation = Sys.getenv('WORK_DIR')) \ No newline at end of file diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R index e9af18a97..a30396d81 100644 --- a/singlecell/resources/chunks/UpdateSeuratPrototype.R +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -1,9 +1,10 @@ -if (!file.exists('/homeDir/.netrc')) { - print(list.files('/homeDir')) - stop('Unable to find file: /homeDir/.netrc') +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) } -invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) for (datasetId in names(seuratObjects)) { diff --git a/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml b/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml index 20387aff2..96e1ebab2 100644 --- a/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml +++ b/singlecell/resources/queries/singlecell/duplicatePrototypes.query.xml @@ -2,7 +2,7 @@ - Duplicated Seurat Object Prototypes + Duplicated Seurat Object Prototypes and Loupe Files Readset Id diff --git a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql index 99cd94576..ee5a3c928 100644 --- a/singlecell/resources/queries/singlecell/duplicatePrototypes.sql +++ b/singlecell/resources/queries/singlecell/duplicatePrototypes.sql @@ -2,9 +2,24 @@ SELECT o.readset, min(o.rowId) as minRowId, min(o.analysis_id) as minAnalysisId, - count(*) as totalPrototypes + count(*) as totalPrototypes, + o.category FROM sequenceanalysis.outputfiles o WHERE o.category = 'Seurat Object Prototype' -GROUP BY o.readset +GROUP BY o.readset, o.category +HAVING COUNT(*) > 1 + +UNION ALL + +SELECT + o.readset, + min(o.rowId) as minRowId, + min(o.analysis_id) as minAnalysisId, + count(*) as totalPrototypes, + o.category + +FROM sequenceanalysis.outputfiles o +WHERE o.category = '10x Loupe File' +GROUP BY o.readset, o.category HAVING COUNT(*) > 1 \ No newline at end of file diff --git a/singlecell/resources/views/singleCellDataManagement.html b/singlecell/resources/views/singleCellDataManagement.html index aafee8cb5..cfda8679e 100644 --- a/singlecell/resources/views/singleCellDataManagement.html +++ b/singlecell/resources/views/singleCellDataManagement.html @@ -130,7 +130,7 @@ queryName: 'stalePrototypes' }) }, { - name: 'Duplicate Seurat Object Prototypes', + name: 'Duplicate Seurat Object Prototypes and Loupe Files', url: LABKEY.ActionURL.buildURL('query', 'executeQuery.view', null, { schemaName: 'singlecell', queryName: 'duplicatePrototypes' diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 6aab5bcdc..81629915c 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -31,14 +31,16 @@ import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.DefaultPipelineStepOutput; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.singlecell.CellHashingService; import org.labkey.api.singlecell.model.CDNA_Library; import org.labkey.api.singlecell.model.Sample; @@ -67,6 +69,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.labkey.singlecell.run.CellRangerGexCountStep.LOUPE_CATEGORY; @@ -1142,50 +1145,6 @@ else if ("Negative".equals(line[htoClassIdx])) } } - public File ensureLocalCopy(File input, File outputDir, Logger log, Set toDelete) throws PipelineJobException - { - if (!outputDir.equals(input.getParentFile())) - { - try - { - //needed for docker currently - log.debug("Copying file to working directory: " + input.getPath()); - File dest = new File(outputDir, input.getName()); - if (dest.exists()) - { - log.debug("deleting existing folder: " + dest.getPath()); - if (input.isDirectory()) - { - FileUtils.deleteDirectory(dest); - } - else - { - dest.delete(); - } - } - - if (input.isDirectory()) - { - FileUtils.copyDirectory(input, dest); - } - else - { - FileUtils.copyFile(input, dest); - } - - toDelete.add(dest); - - return dest; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - return input; - } - private File getExpectedCallsFile(File outputDir, String basename) { return new File(outputDir, basename + CALL_EXTENSION); @@ -1200,24 +1159,20 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St { log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath()); - Set toDelete = new HashSet<>(); - - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(log); - rWrapper.setWorkingDir(outputDir); + List inputFiles = new ArrayList<>(); File molInfo = getMolInfoFileFromCounts(citeSeqCountOutDir); if (!molInfo.exists()) { throw new PipelineJobException("File not found, cannot calculate saturation: " + molInfo.getPath()); } - - molInfo = ensureLocalCopy(molInfo, outputDir, log, toDelete); + inputFiles.add(molInfo); // h5 file used by demuxEM/demuxmix: - File h5 = null; - if (parameters.h5File != null) + File h5 = parameters.h5File; + if (h5 != null) { - h5 = ensureLocalCopy(parameters.h5File, outputDir, log, toDelete); + inputFiles.add(h5); } if (CALLING_METHOD.requiresH5(parameters.methods) && h5 == null) @@ -1225,18 +1180,14 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException("No h5 file provided, but demuxEM/demuxmix was specified"); } - citeSeqCountOutDir = ensureLocalCopy(citeSeqCountOutDir, outputDir, log, toDelete); - - File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; - if (cellBarcodeWhitelistFile != null) + if (citeSeqCountOutDir != null) { - cellBarcodeWhitelistFile = ensureLocalCopy(cellBarcodeWhitelistFile, outputDir, log, toDelete); - } - else - { - log.debug("No cell barcode whitelist provided"); + inputFiles.add(citeSeqCountOutDir); } + File cellBarcodeWhitelistFile = parameters.cellBarcodeWhitelistFile; + inputFiles.add(cellBarcodeWhitelistFile); + File htmlFile = new File(outputDir, basename + ".html"); File localHtml = new File(localPipelineDir, htmlFile.getName()); @@ -1264,24 +1215,55 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St File localRScript = new File(outputDir, "generateCallsWrapper.R"); try (PrintWriter writer = PrintWriters.getPrintWriter(localRScript)) { - List methodNames = parameters.methods.stream().map(Enum::name).collect(Collectors.toList()); - List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().map(Enum::name).collect(Collectors.toList()); - String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'/work/" + cellBarcodeWhitelistFile.getName() + "'" : "NULL"; + String cellbarcodeWhitelist = cellBarcodeWhitelistFile != null ? "'" + cellBarcodeWhitelistFile.getPath() + "'" : "NULL"; + long totalCellBarcodes; + if (cellBarcodeWhitelistFile != null) + { + try (Stream st = Files.lines(cellBarcodeWhitelistFile.toPath())) + { + totalCellBarcodes = st.count(); + } + } + else + { + totalCellBarcodes = 99999L; + } + ctx.getLogger().debug("Total input cell barcodes: " + totalCellBarcodes); Set allowableBarcodes = parameters.getAllowableBarcodeNames(); String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL"; + List methodNames = parameters.methods.stream().filter(m -> { + if (totalCellBarcodes < m.getMinCells()) + { + ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name()); + return false; + } + + return true; + }).map(CALLING_METHOD::getLabel).distinct().toList(); + + List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> { + if (totalCellBarcodes < m.getMinCells()) + { + ctx.getLogger().debug("Dropping consensus method due to insufficient cells: " + m.name()); + return false; + } + + return true; + }).map(CALLING_METHOD::getLabel).distinct().toList(); + String skipNormalizationQcString = parameters.skipNormalizationQc ? "TRUE" : "FALSE"; String keepMarkdown = parameters.keepMarkdown ? "TRUE" : "FALSE"; String doTSNE = parameters.doTSNE ? "TRUE" : "FALSE"; - String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '/work/" + h5.getName() + "'"; + String h5String = h5 == null ? "" : ", rawFeatureMatrixH5 = '" + h5.getPath() + "'"; String consensusMethodString = consensusMethodNames.isEmpty() ? "" : ", methodsForConsensus = c('" + StringUtils.join(consensusMethodNames, "','") + "')"; - writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '/work/" + citeSeqCountOutDir.getName() + "'" + h5String + - ", molInfoFile = '/work/" + molInfo.getName() + "'" + - ", reportFile = '/work/" + htmlFile.getName() + "'" + - ", callFile = '/work/" + callsFile.getName() + "'" + - ", metricsFile = '/work/" + metricsFile.getName() + "'" + - ", rawCountsExport = '/work/" + countFile.getName() + "'" + + writer.println("f <- cellhashR::CallAndGenerateReport(rawCountData = '" + citeSeqCountOutDir.getPath() + "'" + h5String + + ", molInfoFile = '" + molInfo.getPath() + "'" + + ", reportFile = '" + htmlFile.getPath() + "'" + + ", callFile = '" + callsFile.getPath() + "'" + + ", metricsFile = '" + metricsFile.getPath() + "'" + + ", rawCountsExport = '" + countFile.getPath() + "'" + ", cellbarcodeWhitelist = " + cellbarcodeWhitelist + ", barcodeWhitelist = " + allowableBarcodeParam + ", title = '" + parameters.getReportTitle() + "'" + @@ -1302,44 +1284,13 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException(e); } - File localBashScript = new File(outputDir, "generateCallsDockerWrapper.sh"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull ghcr.io/bimberlab/cellhashr:latest"); - writer.println("sudo $DOCKER run --rm=true \\"); - if (SequencePipelineService.get().getMaxRam() != null) - { - writer.println("\t--memory=" + SequencePipelineService.get().getMaxRam() + "g \\"); - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM \\"); - } - - if (SequencePipelineService.get().getMaxThreads(log) != null) - { - writer.println("\t-e SEQUENCEANALYSIS_MAX_THREADS \\"); - } + DockerWrapper wrapper = new DockerWrapper("ghcr.io/bimberlab/cellhashr:latest", ctx.getLogger(), ctx); + wrapper.addToDockerEnvironment("CELLHASHR_DEBUG", "1"); - writer.println("\t-e CELLHASHR_DEBUG=1 \\"); - writer.println("\t-v \"${WD}:/work\" \\"); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t-w /work \\"); - writer.println("\tghcr.io/bimberlab/cellhashr:latest \\"); - writer.println("\tRscript --vanilla " + localRScript.getName()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + PipelineStepOutput output = new DefaultPipelineStepOutput(); + wrapper.executeWithDocker(Arrays.asList("Rscript", "--vanilla", localRScript.getPath()), ctx.getWorkingDirectory(), output, inputFiles); + output.getIntermediateFiles().forEach(File::delete); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); if (!htmlFile.exists()) { throw new PipelineJobException("Unable to find HTML file: " + htmlFile.getPath()); @@ -1409,29 +1360,8 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St throw new PipelineJobException("Unable to find HTO calls file: " + callsFile.getPath()); } - localBashScript.delete(); localRScript.delete(); - try - { - for (File f : toDelete) - { - log.debug("deleting local copy: " + f.getPath()); - if (f.isDirectory()) - { - FileUtils.deleteDirectory(f); - } - else - { - f.delete(); - } - } - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - return callsFile; } diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 7749acfd1..922c93ee9 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -90,7 +90,6 @@ import org.labkey.singlecell.pipeline.singlecell.RunSDA; import org.labkey.singlecell.pipeline.singlecell.RunScGate; import org.labkey.singlecell.pipeline.singlecell.RunScGateBuiltin; -import org.labkey.singlecell.pipeline.singlecell.RunScMetabolism; import org.labkey.singlecell.pipeline.singlecell.RunSingleR; import org.labkey.singlecell.pipeline.singlecell.RunVision; import org.labkey.singlecell.pipeline.singlecell.ScoreCellCycle; @@ -284,7 +283,6 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new RunLDA.Provider()); SequencePipelineService.get().registerPipelineStep(new FilterDisallowedClasses.Provider()); SequencePipelineService.get().registerPipelineStep(new SummarizeTCellActivation.Provider()); - SequencePipelineService.get().registerPipelineStep(new RunScMetabolism.Provider()); SequencePipelineService.get().registerPipelineStep(new ScoreCellCycle.Provider()); SequencePipelineService.get().registerPipelineStep(new TrainScTour.Provider()); SequencePipelineService.get().registerPipelineStep(new PredictScTour.Provider()); diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 628967c06..5c63eed6f 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -139,7 +139,7 @@ public boolean doRunLocal() public Collection getAllowableActionNames() { Set allowableNames = new HashSet<>(); - for (PipelineStepProvider provider: SequencePipelineService.get().getProviders(SingleCellStep.class)) + for (PipelineStepProvider provider: SequencePipelineService.get().getProviders(SingleCellStep.class)) { allowableNames.add(provider.getLabel()); } @@ -603,8 +603,8 @@ else if (step.createsSeuratObjects()) ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Creating Final HTML Report"); File finalHtml = new File(ctx.getOutputDir(), "finalHtml.html"); List lines = new ArrayList<>(); - lines.add("rmarkdown::render(output_file = '" + finalHtml.getName() + "', input = '" + finalMarkdownFile.getName() + "', intermediates_dir = '/work')"); - AbstractSingleCellPipelineStep.executeR(ctx, AbstractCellMembraneStep.CONTAINER_NAME, "pandoc", lines, null, null); + lines.add("rmarkdown::render(output_file = '" + finalHtml.getName() + "', input = '" + finalMarkdownFile.getName() + "', intermediates_dir = '" + ctx.getWorkingDirectory() + "')"); + AbstractSingleCellPipelineStep.executeR(ctx, AbstractCellMembraneStep.CONTAINER_NAME, "pandoc", lines, null, null, null); _resumer.getFileManager().addIntermediateFile(finalMarkdownFile); _resumer.getFileManager().addIntermediateFiles(_resumer.getMarkdownsInOrder()); _resumer.getFileManager().addIntermediateFiles(_resumer.getHtmlFilesInOrder()); @@ -630,7 +630,7 @@ else if (step.createsSeuratObjects()) Integer id = NumberUtils.createInteger(output.getDatasetId()); if (!inputMap.containsKey(id)) { - ctx.getLogger().warn("No input found matching dataset Id: " + output.getDatasetId()); + ctx.getLogger().warn("No input found matching dataset Id: {}", output.getDatasetId()); } else { @@ -641,7 +641,7 @@ else if (step.createsSeuratObjects()) } catch (NumberFormatException e) { - ctx.getLogger().error("Expected dataset ID to be an integer: " + output.getDatasetId()); + ctx.getLogger().error("Expected dataset ID to be an integer: {}", output.getDatasetId()); } } else diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java index cc5740250..dd1d7af87 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PredictScTour.java @@ -12,7 +12,10 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.List; +import java.util.Set; public class PredictScTour extends AbstractRiraStep { @@ -51,25 +54,22 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + Integer fileId = getProvider().getParameterByName("modelFileId").extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class); + if (fileId == null) + { + throw new PipelineJobException("Missing value for modelFileId param"); + } + + return Collections.singleton(ctx.getSequenceSupport().getCachedData(fileId)); + } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java index c1769415a..795664b89 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCelltypistCustomModel.java @@ -1,6 +1,5 @@ package org.labkey.singlecell.pipeline.singlecell; -import org.apache.commons.io.FileUtils; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; @@ -12,8 +11,9 @@ import org.labkey.api.util.PageFlowUtil; import java.io.File; -import java.io.IOException; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; import java.util.List; public class RunCelltypistCustomModel extends AbstractRiraStep @@ -94,25 +94,22 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List getAdditionalDockerInputs(SequenceOutputHandler.JobContext ctx) throws PipelineJobException + { + Integer fileId = getProvider().getParameterByName("modelFileId").extractValue(ctx.getJob(), getProvider(), getStepIdx(), Integer.class); + if (fileId == null) + { + throw new PipelineJobException("Missing value for modelFileId param"); + } + + return Collections.singleton(ctx.getSequenceSupport().getCachedData(fileId)); + } } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java deleted file mode 100644 index 80e0fc4a3..000000000 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunScMetabolism.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.labkey.singlecell.pipeline.singlecell; - -import org.json.JSONObject; -import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.singlecell.pipeline.SeuratToolParameter; -import org.labkey.api.singlecell.pipeline.SingleCellStep; - -import java.util.List; - -public class RunScMetabolism extends AbstractCellMembraneStep -{ - public RunScMetabolism(PipelineContext ctx, RunScMetabolism.Provider provider) - { - super(provider, ctx); - } - - public static class Provider extends AbstractPipelineStepProvider - { - public Provider() - { - super("RunScMetabolism", "scMetabolism", "scMetabolism", "This will run scMetabolism to score enrichment of metabolic pathways.", List.of( - SeuratToolParameter.create("metabolismTypes", "Metabolism Type(s)", "The databases to use", "ldk-simplecombo", new JSONObject() - {{ - put("multiSelect", true); - put("allowBlank", false); - put("storeValues", "KEGG;REACTOME"); - put("initialValues", "KEGG;REACTOME"); - put("delimiter", ";"); - put("joinReturnValue", true); - }}, "KEGG;REACTOME", null, true, true).delimiter(";") - ), null, null); - } - - @Override - public RunScMetabolism create(PipelineContext ctx) - { - return new RunScMetabolism(ctx, this); - } - } - - @Override - public boolean createsSeuratObjects() - { - return true; - } - - @Override - public String getFileSuffix() - { - return "scMetabolism"; - } -} diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java index f4dfecc84..c98fd04c1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/TrainCelltypist.java @@ -134,7 +134,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List chimericCallsRecovered = new HashMap<>(); int restoredTRDVAV = 0; + final Map headerIdx = new HashMap<>(); int lineIdx = 0; while ((line = reader.readLine()) != null) { @@ -873,6 +876,8 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp writer.println(line + ",chain_type"); } + String[] header = line.split(","); + IntStream.range(0, header.length).forEach(idx -> headerIdx.put(header[idx], idx)); continue; } @@ -880,18 +885,23 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings // Restore original value for TRD/TRA - if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV")) + final int vGeneIdx = headerIdx.get("v_gene"); + final int jGeneIdx = headerIdx.get("j_gene"); + final int cGeneIdx = headerIdx.get("c_gene"); + final int chainIdx = headerIdx.get("chain"); + + if (tokens[vGeneIdx].contains("TRDV") && tokens[vGeneIdx].contains("/") && tokens[vGeneIdx].contains("AV")) { restoredTRDVAV++; - String[] split = tokens[6].split("/"); - tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); + String[] split = tokens[vGeneIdx].split("/"); + tokens[vGeneIdx] = "TR" + split[1] + "/" + split[0].replaceAll("TR", ""); } List chains = new ArrayList<>(); String vGeneChain = null; String jGeneChain = null; String cGeneChain = null; - for (int idx : new Integer[]{6,8,9}) + for (int idx : new Integer[]{vGeneIdx,jGeneIdx,cGeneIdx}) { String val = StringUtils.trimToNull(tokens[idx]); if (val != null) @@ -899,15 +909,15 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp val = val.substring(0, 3); chains.add(val); - if (idx == 6) + if (idx == vGeneIdx) { vGeneChain = val; } - if (idx == 8) + if (idx == jGeneIdx) { jGeneChain = val; } - else if (idx == 9) + else if (idx == cGeneIdx) { cGeneChain = val; } @@ -915,7 +925,7 @@ else if (idx == 9) } Set uniqueChains = new HashSet<>(chains); - String originalChain = StringUtils.trimToNull(tokens[5]); + String originalChain = StringUtils.trimToNull(tokens[chainIdx]); // Recover TRDV/TRAJ/TRAC: if (uniqueChains.size() > 1) @@ -925,7 +935,7 @@ else if (idx == 9) { uniqueChains.clear(); uniqueChains.add(cGeneChain); - String key = originalChain + "->" + cGeneChain + " (based on C-GENE)"; + String key = vGeneChain + ":" + jGeneChain + ":" + originalChain + "->" + cGeneChain + " (based on C-GENE)"; chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1); } else if (uniqueChains.size() == 2) @@ -950,14 +960,14 @@ else if (uniqueChains.size() == 2) if (uniqueChains.size() == 1) { String chain = uniqueChains.iterator().next(); - tokens[5] = chain; + tokens[chainIdx] = chain; } else { - log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]); + log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[vGeneIdx] + "/" + tokens[jGeneIdx] + "/" + tokens[cGeneIdx]); } - if (acceptableChains.contains(tokens[5])) + if (acceptableChains.contains(tokens[chainIdx])) { writer.println(StringUtils.join(tokens, ",") + "," + chainType); } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 3c2268aa4..fd6fef01d 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -27,7 +27,7 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; -import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; @@ -297,6 +297,8 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin { if (SequencePipelineService.get().hasMinLineCount(results, 2)) { + long lineCount = SequencePipelineService.get().getLineCount(results); + _ctx.getLogger().debug("Found {} lines in file {}", lineCount, results.getPath()); throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); } } @@ -309,11 +311,8 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin private File prepareReference(File genomeCsv, File genomeFasta, NimbleGenome genome, PipelineStepOutput output) throws PipelineJobException { - genomeCsv = ensureLocalCopy(genomeCsv, output); - genomeFasta = ensureLocalCopy(genomeFasta, output); - File nimbleJson = new File(getPipelineCtx().getWorkingDirectory(), genome.genomeId + ".json"); - runUsingDocker(Arrays.asList("python3", "-m", "nimble", "generate", "--opt-file", "/work/" + genomeFasta.getName(), "--file", "/work/" + genomeCsv.getName(), "--output_path", "/work/" + nimbleJson.getName()), output, "generate-" + genome.genomeId); + runUsingDocker(Arrays.asList("python3", "-m", "nimble", "generate", "--opt-file", genomeFasta.getPath(), "--file", genomeCsv.getPath(), "--output_path", nimbleJson.getPath()), output, "generate-" + genome.genomeId); if (!nimbleJson.exists()) { File doneFile = getNimbleDoneFile(getPipelineCtx().getWorkingDirectory(), "generate-" + genome.genomeId); @@ -410,20 +409,6 @@ private Map doAlignment(List genomes, List resultMap = new HashMap<>(); - File localBam = ensureLocalCopy(bam, output); - ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(bam), output); - - List localRefJsons = refJsons.stream().map(refJson -> { - try - { - return ensureLocalCopy(refJson, output); - } - catch (PipelineJobException e) - { - throw new RuntimeException(e); - } - }).collect(Collectors.toList()); - List alignArgs = new ArrayList<>(); alignArgs.add("python3"); alignArgs.add("-m"); @@ -447,13 +432,32 @@ private Map doAlignment(List genomes, List "/work/" + x.getName()).collect(Collectors.joining(","))); + alignArgs.add(refJsons.stream().map(File::getPath).collect(Collectors.joining(","))); alignArgs.add("--output"); - alignArgs.add("/work/" + alignmentTsvBase.getName()); + alignArgs.add(alignmentTsvBase.getPath()); alignArgs.add("--input"); - alignArgs.add("/work/" + localBam.getName()); + alignArgs.add(bam.getPath()); + + // Create temp folder: + File tmpDir = new File(getPipelineCtx().getWorkingDirectory(), "tmpDir"); + if (tmpDir.exists()) + { + try + { + FileUtils.deleteDirectory(tmpDir); + Files.createDirectory(tmpDir.toPath()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + output.addIntermediateFile(tmpDir); + + alignArgs.add("--tmpdir"); + alignArgs.add(tmpDir.getPath()); boolean dockerRan = runUsingDocker(alignArgs, output, "align.all"); for (NimbleGenome genome : genomes) @@ -492,16 +496,16 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt reportArgs.add("report"); reportArgs.add("-i"); - reportArgs.add("/work/" + alignResultsGz.getName()); + reportArgs.add(alignResultsGz.getPath()); - File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt"); + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt.gz"); if (reportResultsGz.exists()) { reportResultsGz.delete(); } reportArgs.add("-o"); - reportArgs.add("/work/" + reportResultsGz.getName()); + reportArgs.add(reportResultsGz.getPath()); runUsingDocker(reportArgs, output, null, ctx); @@ -520,7 +524,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt plotArgs.add("plot"); plotArgs.add("--input_file"); - plotArgs.add("/work/" + alignResultsGz.getName()); + plotArgs.add(alignResultsGz.getPath()); File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); if (plotResultsHtml.exists()) @@ -529,7 +533,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt } plotArgs.add("--output_file"); - plotArgs.add("/work/" + plotResultsHtml.getName()); + plotArgs.add(plotResultsHtml.getPath()); runUsingDocker(plotArgs, output, null, ctx); @@ -565,73 +569,13 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString, PipelineContext ctx) throws PipelineJobException { - File localBashScript = new File(ctx.getWorkingDirectory(), "docker.sh"); - File dockerBashScript = new File(ctx.getWorkingDirectory(), "dockerRun.sh"); - output.addIntermediateFile(localBashScript); - output.addIntermediateFile(dockerBashScript); - - // Create temp folder: - File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir"); - if (tmpDir.exists()) - { - try - { - FileUtils.deleteDirectory(tmpDir); - Files.createDirectory(tmpDir.toPath()); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - output.addIntermediateFile(tmpDir); + DockerWrapper wrapper = new DockerWrapper(DOCKER_CONTAINER_NAME, ctx.getLogger(), ctx); + wrapper.setWorkingDir(ctx.getWorkingDirectory()); + wrapper.setEntryPoint("/bin/bash"); - try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript);PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) - { - writer.println("#!/bin/bash"); - writer.println("set -x"); - writer.println("WD=`pwd`"); - writer.println("HOME=`echo ~/`"); - - writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("sudo $DOCKER pull " + DOCKER_CONTAINER_NAME); - writer.println("sudo $DOCKER run --rm=true \\"); - - Integer maxRam = SequencePipelineService.get().getMaxRam(); - if (maxRam != null) - { - //int swap = 4*maxRam; - writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); - writer.println("\t--memory='" + maxRam + "g' \\"); - } + wrapper.setTmpDir(null); - ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); - writer.println("\t-v \"${WD}:/work\" \\"); - writer.println("\t-v \"${HOME}:/homeDir\" \\"); - writer.println("\t-u $UID \\"); - writer.println("\t-e RUST_BACKTRACE=1 \\"); - writer.println("\t-e TMPDIR=/work/tmpDir \\"); - writer.println("\t-e USERID=$UID \\"); - writer.println("\t--entrypoint /bin/bash \\"); - writer.println("\t-w /work \\"); - writer.println("\t" + DOCKER_CONTAINER_NAME + " \\"); - writer.println("\t/work/" + dockerBashScript.getName()); - writer.println("EXIT_CODE=$?"); - writer.println("echo 'Docker run exit code: '$EXIT_CODE"); - writer.println("exit $EXIT_CODE"); - - dockerWriter.println("#!/bin/bash"); - dockerWriter.println("set -x"); - - dockerWriter.println(StringUtils.join(nimbleArgs, " ")); - dockerWriter.println("EXIT_CODE=$?"); - dockerWriter.println("echo 'Exit code: '$?"); - dockerWriter.println("exit $EXIT_CODE"); - } - catch (IOException e) - { - throw new PipelineJobException(e); - } + wrapper.addToDockerEnvironment("RUST_BACKTRACE", "1"); File doneFile = null; if (resumeString != null) @@ -650,9 +594,7 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu } } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); - rWrapper.setWorkingDir(ctx.getWorkingDirectory()); - rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); + wrapper.executeWithDocker(nimbleArgs, ctx.getWorkingDirectory(), output); if (doneFile != null) { @@ -669,38 +611,6 @@ private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutpu return true; } - private File ensureLocalCopy(File input, PipelineStepOutput output) throws PipelineJobException - { - return ensureLocalCopy(input, output, getPipelineCtx()); - } - - public static File ensureLocalCopy(File input, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException - { - try - { - if (ctx.getWorkingDirectory().equals(input.getParentFile())) - { - return input; - } - - File local = new File(ctx.getWorkingDirectory(), input.getName()); - if (!local.exists()) - { - ctx.getLogger().debug("Copying file locally: " + input.getPath()); - FileUtils.copyFile(input, local); - } - - output.addIntermediateFile(local); - - return local; - } - catch (IOException e) - { - throw new PipelineJobException(e); - } - } - - private static class NimbleGenome { private final int genomeId; @@ -714,7 +624,7 @@ public NimbleGenome(JSONArray arr, int maxHitsToReport) throws PipelineJobExcept { if (arr.length() < 3) { - throw new PipelineJobException("Improper genome: " + arr.toString()); + throw new PipelineJobException("Improper genome: " + arr); } genomeId = arr.getInt(0); @@ -759,7 +669,7 @@ public Integer getNumMismatches() private String getVersion(PipelineStepOutput output) throws PipelineJobException { List nimbleArgs = new ArrayList<>(); - nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > /work/nimbleVersion.txt"); + nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > nimbleVersion.txt"); runUsingDocker(nimbleArgs, output, null); @@ -769,7 +679,7 @@ private String getVersion(PipelineStepOutput output) throws PipelineJobException throw new PipelineJobException("Unable to find file: " + outFile.getPath()); } - String ret = null; + String ret; try { ret = StringUtils.trimToNull(Files.readString(outFile.toPath())); diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java index 9228cd71e..c0e40daee 100644 --- a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -115,8 +115,7 @@ public void processFilesRemote(List inputFiles, JobContext c } // This will update these files in-place: - File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile, output, ctx); - File reportFile = NimbleHelper.runNimbleReport(alignmentFileLocal, so.getLibrary_id(), output, ctx); + File reportFile = NimbleHelper.runNimbleReport(alignmentFile, so.getLibrary_id(), output, ctx); if (!reportFile.exists()) { throw new PipelineJobException("Unable to find file: " + reportFile.getPath());