diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java index 2aa188ed2..2f7a9f122 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java @@ -2,6 +2,7 @@ import org.labkey.api.data.Container; +import java.util.Collection; import java.util.List; /** @@ -12,4 +13,6 @@ public interface JobResourceSettings boolean isAvailable(Container c); List getParams(); + + Collection getDockerVolumes(Container c); } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java index 506cb6ea4..af718ce46 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PipelineContext.java @@ -16,11 +16,11 @@ package org.labkey.api.sequenceanalysis.pipeline; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.WorkDirectory; import java.io.File; +import java.util.Collection; /** * User: bimber @@ -51,4 +51,6 @@ public interface PipelineContext * This is the directory where the source files were located. In the situation where this is a split job, forceParent=true will return the parent job's sourceDirectory. This can be important if files are written here prior to split. */ File getSourceDirectory(boolean forceParent); + + Collection getDockerVolumes(); } diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index f2e52eb19..64097cd84 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -19,6 +19,7 @@ import htsjdk.samtools.SAMFileHeader; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; +import org.labkey.api.data.Container; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceOutputFile; @@ -26,6 +27,7 @@ import java.io.File; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; @@ -98,6 +100,8 @@ static public void setInstance(SequencePipelineService instance) */ abstract public String getDockerCommand(); + abstract public Collection getDockerVolumes(Container c); + abstract public List getSequenceJobInputFiles(PipelineJob job); /** diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 245274230..12b2158fa 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -4,6 +4,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.writer.PrintWriters; @@ -17,12 +18,14 @@ public class DockerWrapper extends AbstractCommandWrapper { private final String _containerName; + private final PipelineContext _ctx; private File _tmpDir = null; - public DockerWrapper(String containerName, Logger log) + public DockerWrapper(String containerName, Logger log, PipelineContext ctx) { super(log); _containerName = containerName; + _ctx = ctx; } public void setTmpDir(File tmpDir) @@ -49,6 +52,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + _ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); if (_tmpDir != null) { writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index d7162cdfe..a04deedb8 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -1,37 +1,5 @@ #!/bin/bash -# -# -# Copyright (c) 2012 LabKey Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This script is designed to assist with the initial installation of the external informatics tools -# used by the SequenceAnalysis module pipeline. It is a fairly utilitarian script, which has undergone only -# very limited testing. It is designed to provide a template to assist with the install, not a one-click installer. -# This script was written for RHEL6, and has been adapted for Ubuntu. -# -# Prior to using this script, you should run and configure CPAN and the package manager for your OS (ie. yum or apt-get). -# This script is designed to be run as root, using a command similar to: -# -# bash sequence_tools_install.sh -d /usr/local/labkey/ | tee sequence_tools_install.log -# -# NOTE: this script will delete any previously downloaded versions of these tools, assuming they were downloaded to the location -# expected by this script. This is deliberate so that the script can be re-run to perform incremental upgrades of these tools. -# -# -# Variables -# + set -e set -u FORCE_REINSTALL= @@ -85,287 +53,165 @@ echo "LKTOOLS_DIR: $LKTOOLS_DIR" echo "LKSRC_DIR: $LKSRC_DIR" -# -#MORGAN -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install MORGAN" +echo "Install plink2" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/MORGAN || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/plink2 || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf morgan32_release* - rm -Rf morgan_v3* - rm -Rf MORGAN_V* - rm -Rf $LKTOOLS_DIR/MORGAN - - wget http://faculty.washington.edu/eathomp/Anonftp/PANGAEA/MORGAN/morgan_v332_release.tar.gz - gunzip morgan_v332_release.tar.gz - tar -xf morgan_v332_release.tar - echo "Compressing TAR" - gzip morgan_v332_release.tar - cd MORGAN_V332_Release - make morgan + rm -Rf plink2* + rm -Rf $LKTOOLS_DIR/plink2* - cd ../ - cp -R ./MORGAN_V332_Release $LKTOOLS_DIR/MORGAN + wget https://s3.amazonaws.com/plink2-assets/alpha6/plink2_linux_avx2_20241111.zip + unzip plink2_linux_avx2_20241111.zip + + install ./plink2 $LKTOOLS_DIR/plink2 else echo "Already installed" fi +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install minimap2" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/minimap2 || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf minimap2* + rm -Rf $LKTOOLS_DIR/minimap2* + + wget https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28.tar.bz2 + bunzip2 minimap2-2.28.tar.bz2 + tar -xf minimap2-2.28.tar + + cd minimap2-2.28 + make + + install minimap2 $LKTOOLS_DIR/ +else + echo "Already installed" +fi -# -#GIGI -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install GIGI" +echo "Install cellsnp-lite" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/GIGI || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/cellsnp-lite || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf GIGI_v1.06.1* - rm -Rf $LKTOOLS_DIR/GIGI - rm -Rf __MACOSX* + rm -Rf cellsnp-lite* + rm -Rf $LKTOOLS_DIR/cellsnp-lite* - wget https://faculty.washington.edu/wijsman/progdists/gigi/software/GIGI/GIGI_v1.06.1.zip - unzip GIGI_v1.06.1.zip - cd GIGI_v1.06.1 + git clone https://github.com/single-cell-genetics/cellsnp-lite.git + cd cellsnp-lite + autoreconf -iv + ./configure --with-htslib=${LKTOOLS_DIR}/lib make - cd ../ - install ./GIGI_v1.06.1/GIGI $LKTOOLS_DIR/GIGI + install cellsnp-lite $LKTOOLS_DIR/ +else + echo "Already installed" +fi + + +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install sratoolkit" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/fasterq-dump || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf sratoolkit* + rm -Rf $LKTOOLS_DIR/sratoolkit* + rm -Rf $LKTOOLS_DIR/fasterq-dump* + + wget https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/current/sratoolkit.current-centos_linux64.tar.gz + tar -xf sratoolkit.current-centos_linux64.tar.gz + cp -R sratoolkit.3.1.1-centos_linux64 $LKTOOLS_DIR + ln -s ${LKTOOLS_DIR}/sratoolkit.3.1.1-centos_linux64/bin/fasterq-dump ${LKTOOLS_DIR}/fasterq-dump else echo "Already installed" fi -# -# PARalyzer -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install PARalyzer" +echo "Install gffread" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/PARalyzer || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/gffread || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf PARalyzer_v1_5* - rm -Rf $LKTOOLS_DIR/PARalyzer + rm -Rf gffread* + rm -Rf $LKTOOLS_DIR/gffread* - wget --no-check-certificate https://ohlerlab.mdc-berlin.de/files/duke/PARalyzer/PARalyzer_v1_5.tar.gz - gunzip PARalyzer_v1_5.tar.gz - tar -xf PARalyzer_v1_5.tar - gzip PARalyzer_v1_5.tar + wget https://github.com/gpertea/gffread/releases/download/v0.12.7/gffread-0.12.7.Linux_x86_64.tar.gz + tar -xf gffread-0.12.7.Linux_x86_64.tar.gz - install ./PARalyzer_v1_5/PARalyzer $LKTOOLS_DIR/PARalyzer + install ./gffread-0.12.7.Linux_x86_64/gffread $LKTOOLS_DIR/ else echo "Already installed" fi -# -# bwa -# echo "" echo "" echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install miRDeep2" +echo "Install paragraph" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/miRDeep2 || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/paragraph || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" - rm -Rf mirdeep2_0_0_7* - rm -Rf $LKTOOLS_DIR/miRDeep2.pl + rm -Rf paragraph* + rm -Rf $LKTOOLS_DIR/paragraph* - wget --no-check-certificate https://www.mdc-berlin.de/43969303/en/research/research_teams/systems_biology_of_gene_regulatory_elements/projects/miRDeep/mirdeep2_0_0_7.zip - unzip mirdeep2_0_0_7.zip + mkdir paragraph + cd paragraph + wget https://github.com/Illumina/paragraph/releases/download/v2.4a/paragraph-v2.4a-binary.zip + unzip paragraph-v2.4a-binary.zip + rm paragraph-v2.4a-binary.zip - cp -R ./mirdeep2_0_0_7 $LKTOOLS_DIR/miRDeep2 + cd ../ + cp -R paragraph $LKTOOLS_DIR + ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph + ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py else echo "Already installed" fi -## -##Mira -## -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install Mira Assembler" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/mira || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf mira_4.0rc4_linux-gnu_x86_64* -# rm -Rf mira_4.0.2_linux-gnu_x86_64* -# rm -Rf mira-4.0* -# -# rm -Rf $LKTOOLS_DIR/mira -# rm -Rf $LKTOOLS_DIR/miraconvert -# -# wget http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_linux-gnu_x86_64_static.tar.bz2 -# bunzip2 mira_4.0.2_linux-gnu_x86_64_static.tar.bz2 -# tar -xf mira_4.0.2_linux-gnu_x86_64_static.tar -# echo "Compressing TAR" -# bzip2 mira_4.0.2_linux-gnu_x86_64_static.tar -# cd mira_4.0.2_linux-gnu_x86_64_static -# -# cd $LKTOOLS_DIR -# ln -s ./src/mira_4.0.2_linux-gnu_x86_64_static/bin/mira mira -# ln -s ./src/mira_4.0.2_linux-gnu_x86_64_static/bin/miraconvert miraconvert -#else -# echo "Already installed" -#fi - -## -##velvet -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install velvet" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/velvetg || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf velvet_1.2.09.tgz -# rm -Rf velvet_1.2.09.tar.gz -# rm -Rf velvet_1.2.09.tar -# rm -Rf velvet_1.2.09 -# rm -Rf $LKTOOLS_DIR/velvetg -# rm -Rf $LKTOOLS_DIR/velveth -# -# wget http://www.ebi.ac.uk/~zerbino/velvet/velvet_1.2.09.tgz -# gunzip velvet_1.2.09.tgz -# tar -xf velvet_1.2.09.tar -# echo "Compressing TAR" -# gzip velvet_1.2.09.tar -# cd velvet_1.2.09 -# make OPENMP=1 LONGSEQUENCES=1 -# -# cd $LKTOOLS_DIR -# ln -s ./src/velvet_1.2.09/velvetg velvetg -# ln -s ./src/velvet_1.2.09/velveth velveth -#else -# echo "Already installed" -#fi - -## -##VelvetOptimiser -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Installing VelvetOptimiser" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/VelvetOptimiser.pl || ! -z $FORCE_REINSTALL ]]; -#then -# rm -Rf VelvetOptimiser-2.2.5.tar.gz -# rm -Rf VelvetOptimiser-2.2.5.tar -# rm -Rf VelvetOptimiser-2.2.5 -# rm -Rf $LKTOOLS_DIR/VelvetOptimiser.pl -# -# wget http://www.vicbioinformatics.com/VelvetOptimiser-2.2.5.tar.gz -# gunzip VelvetOptimiser-2.2.5.tar.gz -# tar -xf VelvetOptimiser-2.2.5.tar -# gzip VelvetOptimiser-2.2.5.tar -# cd VelvetOptimiser-2.2.5 -# -# cd $LKTOOLS_DIR -# ln -s ./src/VelvetOptimiser-2.2.5/VelvetOptimiser.pl VelvetOptimiser.pl -#else -# echo "Already installed" -#fi - -## -##AMOS -## -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Installing AMOS" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/bank-transact || ! -z $FORCE_REINSTALL ]]; -#then -# rm -Rf amos-3.1.0.tar.gz -# rm -Rf amos-3.1.0.tar -# rm -Rf amos-3.1.0 -# rm -Rf $LKTOOLS_DIR/bank2fasta -# rm -Rf $LKTOOLS_DIR/bank2contig -# rm -Rf $LKTOOLS_DIR/bank-transact -# -# wget http://downloads.sourceforge.net/project/amos/amos/3.1.0/amos-3.1.0.tar.gz -# gunzip amos-3.1.0.tar.gz -# tar -xf amos-3.1.0.tar -# cd amos-3.1.0 -# ./configure -# make -# make install -# -# cd $LKTOOLS_DIR -# ln -s ./src/amos-3.1.0/bin/bank2fasta bank2fasta -# ln -s ./src/amos-3.1.0/bin/bank2contig bank2contig -# ln -s ./src/amos-3.1.0/bin/bank-transact bank-transact -#else -# echo "Already installed" -#fi - -# -# htseq -# -#echo "" -#echo "" -#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -#echo "Install htseq" -#echo "" -#cd $LKSRC_DIR -# -#if [[ ! -e ${LKTOOLS_DIR}/htseq || ! -z $FORCE_REINSTALL ]]; -#then -# echo "Cleaning up previous installs" -# rm -Rf STAR_2.4* -# rm -Rf $LKTOOLS_DIR/STAR -# -# if [ -n $SKIP_PACKAGE_MANAGER ]; then -# echo "Skipping package install" -# elif [ $(which apt-get) ]; then -# apt-get install build-essential python2.7-dev python-numpy python-matplotlib -# elif [ $(which yum) ]; then -# yum install python-devel numpy python-matplotlib -# fi -# -# wget https://pypi.python.org/packages/source/H/HTSeq/HTSeq-0.6.1.tar.gz -# gunzip HTSeq-0.6.1.tar.gz -# tar -xf HTSeq-0.6.1.tar -# gzip HTSeq-0.6.1.tar -# -# cd HTSeq-0.6.1 -# python setup.py install --user -#else -# echo "Already installed" -#fi +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install sniffles2" +echo "" +cd $LKSRC_DIR +if [[ ! -e ${LKTOOLS_DIR}/sniffles || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf sniffles* + rm -Rf $LKTOOLS_DIR/sniffles* + + module load python/3.11.7 + python -m ensurepip --upgrade + python -m pip install --force-reinstall --target ${LKTOOLS_DIR}/pythonPackages git+https://github.com/fritzsedlazeck/Sniffles.git +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index 7a8946901..cb5d78053 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -146,18 +146,49 @@ if [[ ! -e ${LKTOOLS_DIR}/bwa || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" rm -Rf bwa-0.* + rm -Rf bwa.zip + rm -Rf v0.7.18* rm -Rf $LKTOOLS_DIR/bwa - wget $WGET_OPTS -O bwa.zip https://github.com/lh3/bwa/zipball/master/ - unzip bwa.zip - DIRNAME=`ls | grep lh3-bwa` - cd $DIRNAME + wget $WGET_OPTS https://github.com/lh3/bwa/archive/refs/tags/v0.7.18.tar.gz + tar -xf v0.7.18.tar.gz + + cd bwa-0.7.18 make install bwa $LKTOOLS_DIR/ else echo "Already installed" fi + +# +# bwa-mem2 +# +echo "" +echo "" +echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" +echo "Install BWA-mem2" +echo "" +cd $LKSRC_DIR + +if [[ ! -e ${LKTOOLS_DIR}/bwa-mem2 || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf bwa-mem2* + rm -Rf $LKTOOLS_DIR/bwa-mem2* + + wget $WGET_OPTS https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 + bunzip2 bwa-mem2-2.2.1_x64-linux.tar.bz2 + tar -xf bwa-mem2-2.2.1_x64-linux.tar + + # NOTE: all executables are needed: + install bwa-mem2-2.2.1_x64-linux/bwa-mem2* $LKTOOLS_DIR/ +else + echo "Already installed" +fi + + + # # gffread # @@ -274,10 +305,10 @@ then rm -Rf gatk-4* rm -Rf $LKTOOLS_DIR/GenomeAnalysisTK4.jar - wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.4.0.0/gatk-4.4.0.0.zip - unzip gatk-4.4.0.0.zip + wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.6.1.0/gatk-4.6.1.0.zip + unzip gatk-4.6.1.0.zip - cp ./gatk-4.4.0.0/gatk-package-4.4.0.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar + cp ./gatk-4.6.1.0/gatk-package-4.6.1.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar else echo "Already installed" fi @@ -298,14 +329,13 @@ then echo "Cleaning up previous installs" rm -Rf STAR_2.* rm -Rf $LKTOOLS_DIR/STAR + rm -Rf $LKTOOLS_DIR/STARlong - wget $WGET_OPTS https://github.com/alexdobin/STAR/archive/2.7.10b.tar.gz - gunzip 2.7.10b.tar.gz - tar -xf 2.7.10b.tar - gzip 2.7.10b.tar + wget $WGET_OPTS https://github.com/alexdobin/STAR/releases/download/2.7.11b/STAR_2.7.11b.zip + unzip STAR_2.7.11b.zip - install ./STAR-2.7.10b/bin/Linux_x86_64_static/STAR $LKTOOLS_DIR/STAR - install ./STAR-2.7.10b/bin/Linux_x86_64_static/STARlong $LKTOOLS_DIR/STARlong + install ./STAR_2.7.11b/Linux_x86_64/STAR $LKTOOLS_DIR/STAR + install ./STAR_2.7.11b/Linux_x86_64/STARlong $LKTOOLS_DIR/STARlong else echo "Already installed" fi @@ -433,12 +463,13 @@ then rm -Rf $LKTOOLS_DIR/samtools rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/1.20/samtools-1.20.tar.bz2 - bunzip2 samtools-1.20.tar.bz2 - tar -xf samtools-1.20.tar + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/samtools/releases/download/${ST_VERSION}/samtools-${ST_VERSION}.tar.bz2 + bunzip2 samtools-${ST_VERSION}.tar.bz2 + tar -xf samtools-${ST_VERSION}.tar echo "Compressing TAR" - bzip2 samtools-1.20.tar - cd samtools-1.20 + bzip2 samtools-${ST_VERSION}.tar + cd samtools-${ST_VERSION} ./configure make install ./samtools ${LKTOOLS_DIR}/samtools @@ -462,10 +493,11 @@ then rm -Rf bcftools* rm -Rf $LKTOOLS_DIR/bcftools - wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2 - tar xjvf bcftools-1.20.tar.bz2 - chmod 755 bcftools-1.20 - cd bcftools-1.20 + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/bcftools/releases/download/${ST_VERSION}/bcftools-${ST_VERSION}.tar.bz2 + tar xjvf bcftools-${ST_VERSION}.tar.bz2 + chmod 755 bcftools-${ST_VERSION} + cd bcftools-${ST_VERSION} rm -f plugins/liftover.c wget $WGET_OPTS -P plugins https://raw.githubusercontent.com/freeseek/score/master/liftover.c @@ -495,15 +527,17 @@ then rm -Rf $LKTOOLS_DIR/tabix rm -Rf $LKTOOLS_DIR/bgzip - wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2 - bunzip2 htslib-1.20.tar.bz2 - tar -xf htslib-1.20.tar + ST_VERSION=1.21 + wget $WGET_OPTS https://github.com/samtools/htslib/releases/download/${ST_VERSION}/htslib-${ST_VERSION}.tar.bz2 + bunzip2 htslib-${ST_VERSION}.tar.bz2 + tar -xf htslib-${ST_VERSION}.tar echo "Compressing TAR" - bzip2 htslib-1.20.tar - chmod 755 htslib-1.20 - cd htslib-1.20 - ./configure + bzip2 htslib-${ST_VERSION}.tar + chmod 755 htslib-${ST_VERSION} + cd htslib-${ST_VERSION} + ./configure --prefix=${LKTOOLS_DIR}/lib make + make install install ./tabix $LKTOOLS_DIR install ./bgzip $LKTOOLS_DIR @@ -528,7 +562,7 @@ then rm -Rf bedtools* rm -Rf $LKTOOLS_DIR/bedtools - wget -O bedtools $WGET_OPTS https://github.com/arq5x/bedtools2/releases/download/v2.30.0/bedtools.static.binary + wget -O bedtools $WGET_OPTS https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static chmod +x bedtools install ./bedtools ${LKTOOLS_DIR}/bedtools @@ -645,7 +679,7 @@ then rm -Rf $LKTOOLS_DIR/htsjdk-* rm -Rf $LKTOOLS_DIR/libIntelDeflater.so - wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/3.0.0/picard.jar + wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/3.3.0/picard.jar cp -R ./picard.jar $LKTOOLS_DIR/ else @@ -1063,8 +1097,8 @@ cd $LKSRC_DIR if [[ ! -e ${LKTOOLS_DIR}/lofreq || ! -z $FORCE_REINSTALL ]]; then - rm -Rf lofreq_star* - rm -Rf $LKTOOLS_DIR/lofreq_star* + rm -Rf lofreq* + rm -Rf $LKTOOLS_DIR/lofreq* wget $WGET_OPTS https://github.com/CSB5/lofreq/raw/master/dist/lofreq_star-2.1.4_linux-x86-64.tgz tar -xf lofreq_star-2.1.4_linux-x86-64.tgz diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 9b4cfb4c5..338bf6939 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -67,10 +67,31 @@ import org.labkey.sequenceanalysis.button.DownloadSraButton; import org.labkey.sequenceanalysis.button.ReprocessLibraryButton; import org.labkey.sequenceanalysis.button.RunMultiQCButton; -import org.labkey.sequenceanalysis.pipeline.*; +import org.labkey.sequenceanalysis.pipeline.AlignmentAnalysisJob; +import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob; +import org.labkey.sequenceanalysis.pipeline.CacheGenomePipelineJob; +import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger; +import org.labkey.sequenceanalysis.pipeline.ConvertToCramHandler; +import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob; +import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob; +import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob; +import org.labkey.sequenceanalysis.pipeline.OrphanFilePipelineProvider; +import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; +import org.labkey.sequenceanalysis.pipeline.ReadsetImportJob; +import org.labkey.sequenceanalysis.pipeline.ReblockGvcfHandler; +import org.labkey.sequenceanalysis.pipeline.ReferenceLibraryPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequenceAlignmentJob; +import org.labkey.sequenceanalysis.pipeline.SequenceAlignmentTask; +import org.labkey.sequenceanalysis.pipeline.SequenceJob; +import org.labkey.sequenceanalysis.pipeline.SequenceJobSupportImpl; +import org.labkey.sequenceanalysis.pipeline.SequenceOutputHandlerPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequencePipelineProvider; +import org.labkey.sequenceanalysis.pipeline.SequenceReadsetHandlerPipelineProvider; +import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob; import org.labkey.sequenceanalysis.query.SequenceAnalysisUserSchema; import org.labkey.sequenceanalysis.query.SequenceTriggerHelper; import org.labkey.sequenceanalysis.run.RestoreSraDataHandler; +import org.labkey.sequenceanalysis.run.alignment.BWAMem2Wrapper; import org.labkey.sequenceanalysis.run.alignment.BWAMemWrapper; import org.labkey.sequenceanalysis.run.alignment.BWASWWrapper; import org.labkey.sequenceanalysis.run.alignment.BWAWrapper; @@ -82,7 +103,27 @@ import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; -import org.labkey.sequenceanalysis.run.analysis.*; +import org.labkey.sequenceanalysis.run.analysis.BamIterator; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep; +import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep; +import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis; +import org.labkey.sequenceanalysis.run.analysis.ExportOverlappingReadsAnalysis; +import org.labkey.sequenceanalysis.run.analysis.GenrichStep; +import org.labkey.sequenceanalysis.run.analysis.HaplotypeCallerAnalysis; +import org.labkey.sequenceanalysis.run.analysis.ImmunoGenotypingAnalysis; +import org.labkey.sequenceanalysis.run.analysis.LofreqAnalysis; +import org.labkey.sequenceanalysis.run.analysis.MergeLoFreqVcfHandler; +import org.labkey.sequenceanalysis.run.analysis.NextCladeHandler; +import org.labkey.sequenceanalysis.run.analysis.PARalyzerAnalysis; +import org.labkey.sequenceanalysis.run.analysis.PangolinHandler; +import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis; +import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler; +import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis; +import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis; +import org.labkey.sequenceanalysis.run.analysis.UnmappedReadExportHandler; +import org.labkey.sequenceanalysis.run.analysis.ViralAnalysis; import org.labkey.sequenceanalysis.run.assembly.TrinityRunner; import org.labkey.sequenceanalysis.run.bampostprocessing.AddOrReplaceReadGroupsStep; import org.labkey.sequenceanalysis.run.bampostprocessing.BaseQualityScoreRecalibrator; @@ -116,7 +157,27 @@ import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler; import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; import org.labkey.sequenceanalysis.run.util.SVAnnotateStep; -import org.labkey.sequenceanalysis.run.variant.*; +import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; +import org.labkey.sequenceanalysis.run.variant.GenotypeConcordanceStep; +import org.labkey.sequenceanalysis.run.variant.GenotypeFiltrationStep; +import org.labkey.sequenceanalysis.run.variant.KingInferenceStep; +import org.labkey.sequenceanalysis.run.variant.MendelianViolationReportStep; +import org.labkey.sequenceanalysis.run.variant.MergeVcfsAndGenotypesHandler; +import org.labkey.sequenceanalysis.run.variant.MultiAllelicPositionsHandler; +import org.labkey.sequenceanalysis.run.variant.PlinkPcaStep; +import org.labkey.sequenceanalysis.run.variant.SNPEffStep; +import org.labkey.sequenceanalysis.run.variant.SampleRenameStep; +import org.labkey.sequenceanalysis.run.variant.SelectSNVsStep; +import org.labkey.sequenceanalysis.run.variant.SelectSamplesStep; +import org.labkey.sequenceanalysis.run.variant.SelectVariantsStep; +import org.labkey.sequenceanalysis.run.variant.SplitVcfBySamplesStep; +import org.labkey.sequenceanalysis.run.variant.SummarizeGenotypeQualityStep; +import org.labkey.sequenceanalysis.run.variant.VariantAnnotatorStep; +import org.labkey.sequenceanalysis.run.variant.VariantEvalBySampleStep; +import org.labkey.sequenceanalysis.run.variant.VariantEvalStep; +import org.labkey.sequenceanalysis.run.variant.VariantFiltrationStep; +import org.labkey.sequenceanalysis.run.variant.VariantQCStep; +import org.labkey.sequenceanalysis.run.variant.VariantsToTableStep; import org.labkey.sequenceanalysis.util.Barcoder; import org.labkey.sequenceanalysis.util.ChainFileValidator; import org.labkey.sequenceanalysis.util.ScatterGatherUtils; @@ -237,6 +298,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new BowtieWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new Bowtie2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWAMemWrapper.Provider()); + SequencePipelineService.get().registerPipelineStep(new BWAMem2Wrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWAWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new BWASWWrapper.Provider()); SequencePipelineService.get().registerPipelineStep(new MosaikWrapper.Provider()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index 8c9142869..9716cac61 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -8,6 +8,7 @@ import org.jetbrains.annotations.Nullable; import org.junit.Assert; import org.junit.Test; +import org.labkey.api.data.Container; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineJobService; @@ -43,6 +44,7 @@ import java.lang.reflect.ParameterizedType; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -457,6 +459,29 @@ public String getDockerCommand() return "docker"; } + @Override + public Collection getDockerVolumes(Container c) + { + if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer) + { + throw new IllegalArgumentException("SequencePipelineService.getDockerVolumes() should only be called from the webserver!"); + } + + Set volumeLines = new HashSet<>(); + for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings()) + { + if (settings.isAvailable(c)) + { + for (String volume : settings.getDockerVolumes(c)) + { + volumeLines.add("-v '" + volume + "':'" + volume + "'"); + } + } + } + + return volumeLines; + } + @Override public List getSequenceJobInputFiles(PipelineJob job) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java index 249e53344..04d2c919e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/DeepVariantHandler.java @@ -125,7 +125,7 @@ public void processFilesRemote(List inputFiles, JobContext c } boolean retainVcf = ctx.getParams().optBoolean("retainVcf", false); - wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, ctx.getFileManager(), binVersion, args); + wrapper.execute(so.getFile(), referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, ctx.getFileManager(), binVersion, args, ctx); action.addOutput(outputFile, "gVCF File", false); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java index 7d6f6e9e1..61ce01cd2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/GLNexusHandler.java @@ -191,7 +191,7 @@ else if (genomeIds.isEmpty()) { ctx.getLogger().debug("Running GLNexus for contig: " + r.getSequenceName()); ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Processing: " + r.getSequenceName()); - new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, contigVcf, ctx.getFileManager(), binVersion, configType, r); + new GLNexusWrapper(ctx.getLogger()).execute(inputVcfs, contigVcf, ctx.getFileManager(), binVersion, configType, r, ctx); vcfs.add(contigVcf); try { @@ -261,7 +261,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec) throws PipelineJobException + public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker tracker, String binVersion, String configType, SAMSequenceRecord rec, JobContext ctx) throws PipelineJobException { File workDir = outputVcf.getParentFile(); tracker.addIntermediateFile(outputVcf); @@ -291,6 +291,7 @@ public void execute(List inputGvcfs, File outputVcf, PipelineOutputTracker writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t -w /work \\"); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java index 9616213a2..2b5abaa76 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java @@ -13,6 +13,7 @@ import htsjdk.variant.vcf.VCFReader; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; @@ -36,7 +37,6 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; -import java.nio.file.StandardCopyOption; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; @@ -107,11 +107,11 @@ public void init(JobContext ctx, List inputFiles, List distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet()); if (distinctLibraries.size() > 1) { - throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", "))); + throw new PipelineJobException("File has more than one library in read group(s), found: " + String.join(", ", distinctLibraries)); } Set distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet()); if (distinctSamples.size() > 1) { - throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", "))); + throw new PipelineJobException("File has more than one sample in read group(s), found: " + String.join(", ", distinctSamples)); } if ( - distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L && - distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L + distinctLibraries.stream().allMatch(x -> x.equals(newRsName)) && + distinctSamples.stream().allMatch(x -> x.equals(newRsName)) ) { throw new PipelineJobException("Sample and library names match in read group(s), aborting"); } + log.info("Readset name and header do not match: " + newRsName + " / existing library: " + distinctLibraries.stream().distinct().collect(Collectors.joining()) + ", existing sample: " + distinctSamples.stream().distinct().collect(Collectors.joining())); + return header; } catch (IOException e) @@ -156,7 +158,7 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n } } - private VCFHeader getAndValidateHeaderForVcf(SequenceOutputFile so, String newRsName) throws PipelineJobException + private VCFHeader getAndValidateHeaderForVcf(SequenceOutputFile so, String newRsName, Logger log) throws PipelineJobException { try (VCFReader reader = new VCFFileReader(so.getFile())) { @@ -173,6 +175,8 @@ private VCFHeader getAndValidateHeaderForVcf(SequenceOutputFile so, String newRs throw new PipelineJobException("Sample names match, aborting"); } + log.info("Readset name and header do not match: " + newRsName + " / " + existingSample); + return header; } catch (IOException e) @@ -209,7 +213,7 @@ else if (SequenceUtil.FILETYPE.gvcf.getFileType().isType(so.getFile()) | Sequenc private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName) throws PipelineJobException { - VCFHeader header = getAndValidateHeaderForVcf(so, newRsName); + VCFHeader header = getAndValidateHeaderForVcf(so, newRsName, ctx.getLogger()); String existingSample = header.getGenotypeSamples().get(0); File sampleNamesFile = new File(ctx.getWorkingDirectory(), "sampleNames.txt"); @@ -257,7 +261,7 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName } FileUtils.moveFile(outputIdx, inputIndex); - addTracker(so, existingSample, newRsName); + addTracker(so, existingSample, newRsName, null); } catch (IOException e) { @@ -265,7 +269,7 @@ private void reheaderVcf(SequenceOutputFile so, JobContext ctx, String newRsName } } - private void addTracker(SequenceOutputFile so, String existingSample, String newRsName) throws IOException + private void addTracker(SequenceOutputFile so, String existingSample, String newRsName, @Nullable String existingLibrary) throws IOException { File tracker = new File(so.getFile().getParentFile(), "reheaderHistory.txt"); boolean preExisting = tracker.exists(); @@ -278,10 +282,10 @@ private void addTracker(SequenceOutputFile so, String existingSample, String new { if (!preExisting) { - writer.println("OriginalSample\tNewSample"); + writer.println("OriginalSample\tNewSample\tOriginalLibrary"); } - writer.println(existingSample + "\t" + newRsName); + writer.println(existingSample + "\t" + newRsName + "\t" + (existingLibrary == null ? "N/A" : existingLibrary)); } } @@ -289,7 +293,7 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new { try { - SAMFileHeader header = getAndValidateHeaderForBam(so, newRsName); + SAMFileHeader header = getAndValidateHeaderForBam(so, newRsName, ctx.getLogger()); List rgs = header.getReadGroups(); String existingSample = rgs.get(0).getSample(); @@ -341,7 +345,7 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new } FileUtils.moveFile(outputIdx, inputIndex); - addTracker(so, existingSample, newRsName); + addTracker(so, existingSample, newRsName, existingLibrary); } catch (IOException e) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java index b019b876c..f59d13986 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheGenomePipelineJob.java @@ -146,7 +146,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { return new CacheGenomesTask(this, job); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java index cd40c8a6f..1c09535e6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/JobContextImpl.java @@ -14,6 +14,7 @@ import java.io.File; import java.util.Arrays; +import java.util.Collection; import java.util.LinkedHashSet; /** @@ -128,4 +129,9 @@ public LinkedHashSet getActions() { return _actions; } + + public Collection getDockerVolumes() + { + return _job.getDockerVolumes(); + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java index 0ef25c152..9d1998016 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJob.java @@ -36,6 +36,7 @@ import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.HasJobParams; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.settings.AppProps; import org.labkey.api.util.FileType; import org.labkey.api.util.FileUtil; @@ -74,6 +75,7 @@ public class SequenceJob extends PipelineJob implements FileAnalysisJobSupport, private List _inputFiles; private List _outputsToCreate = new ArrayList<>(); private PipeRoot _folderFileRoot; + private Collection _dockerVolumes; transient private JSONObject _params; @@ -105,6 +107,7 @@ protected SequenceJob(SequenceJob parentJob, String jobName, String subdirectory _folderPrefix = parentJob._folderPrefix; _inputFiles = parentJob._inputFiles; _folderFileRoot = parentJob._folderFileRoot; + _dockerVolumes = parentJob._dockerVolumes; _params = parentJob.getParameterJson(); @@ -134,6 +137,7 @@ public SequenceJob(String providerName, Container c, User u, @Nullable String jo writeParameters(params); _folderFileRoot = c.isWorkbook() ? PipelineService.get().findPipelineRoot(c.getParent()) : pipeRoot; + _dockerVolumes = SequencePipelineService.get().getDockerVolumes(c); setLogFile(_getLogFile()); writeSupportToDisk(); @@ -184,6 +188,16 @@ public void setFolderFileRoot(PipeRoot folderFileRoot) _folderFileRoot = folderFileRoot; } + public Collection getDockerVolumes() + { + return _dockerVolumes == null ? Collections.emptySet() : Collections.unmodifiableCollection(_dockerVolumes); + } + + public void setDockerVolumes(Collection dockerVolumes) + { + _dockerVolumes = dockerVolumes; + } + public void setDescription(String description) { _description = description; diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java index 8ee1415e6..97b1ad53f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceTaskHelper.java @@ -50,6 +50,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -344,4 +345,9 @@ public void cacheExpDatasForParams() throws PipelineJobException } } } + + public Collection getDockerVolumes() + { + return _job.getDockerVolumes(); + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java new file mode 100644 index 000000000..e57fdf757 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -0,0 +1,83 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * User: bimber + * Date: 6/14/2014 + * Time: 8:35 AM + */ +public class BWAMem2Wrapper extends BWAMemWrapper +{ + public BWAMem2Wrapper(@Nullable Logger logger) + { + super(logger); + } + + public static class BWAMem2AlignmentStep extends BWAAlignmentStep + { + public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger())); + } + + @Override + public boolean doAddReadGroups() + { + return false; + } + } + + public static class Provider extends AbstractAlignmentStepProvider + { + public Provider() + { + super("BWA-Mem2", null, Arrays.asList( + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-a"), "outputAll", "Output All Hits", "Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments.", "checkbox", new JSONObject(){{ + put("checked", false); + }}, true), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-M"), "markSplit", "Mark Shorter Hits As Secondary", "Mark shorter split hits as secondary (for Picard compatibility).", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true), + ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("-k"), "minSeedLength", "Min Seed Length", "Matches shorter than this value will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. Default value: 19", "ldk-integerfield", new JSONObject(){{ + + }}, null) + ), null, "https://github.com/bwa-mem2/bwa-mem2", true, true); + } + + @Override + public BWAMem2AlignmentStep create(PipelineContext context) + { + return new BWAMem2AlignmentStep(this, context); + } + } + + @Override + public File getExe() + { + return SequencePipelineService.get().getExeForPackage("BWAPATH", "bwa-mem2"); + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index a6f4605a6..fa929a0ad 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -18,6 +18,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.DockerWrapper; +import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; @@ -42,7 +43,13 @@ public ParagraphStep() ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() {{ put("allowBlank", false); - }}, null) + }}, null), + ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{ + put("checked", false); + }}, false), + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", false); + }}, false) )); } @@ -103,6 +110,33 @@ else if (!svVcf.exists()) throw new PipelineJobException("Missing file: " + svVcf.getPath()); } + boolean doBndSubset = ctx.getParams().optBoolean("doBndSubset", false); + if (doBndSubset) + { + File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "pgSubset.vcf.gz"); + File vcfNoBndIdx = new File(vcfNoBnd.getPath() + ".tbi"); + if (vcfNoBndIdx.exists()) + { + ctx.getLogger().debug("Index exists, will no repeat VCF subset"); + } + else + { + SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger()); + List selectArgs = new ArrayList<>(); + selectArgs.add("-select"); + selectArgs.add("SVTYPE != 'BND' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))"); + selectArgs.add("--exclude-filtered"); + selectArgs.add("--exclude-filtered"); + selectArgs.add("--sites-only-vcf-output"); + + svw.execute(ctx.getSequenceSupport().getCachedGenome(inputFiles.get(0).getLibrary_id()).getWorkingFastaFile(), svVcf, vcfNoBnd, selectArgs); + + ctx.getFileManager().addIntermediateFile(vcfNoBnd); + ctx.getFileManager().addIntermediateFile(vcfNoBndIdx); + svVcf = vcfNoBnd; + } + } + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); for (SequenceOutputFile so : inputFiles) { @@ -140,7 +174,7 @@ else if (!svVcf.exists()) try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) { SAMFileHeader header = reader.getFileHeader(); - if (header.getReadGroups().size() == 0) + if (header.getReadGroups().isEmpty()) { throw new PipelineJobException("No read groups found in input BAM"); } @@ -170,7 +204,7 @@ else if (!svVcf.exists()) } ctx.getFileManager().addIntermediateFile(coverageFile); - DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger()); + DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger(), ctx); List paragraphArgs = new ArrayList<>(); paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java index b49680702..0490ed063 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/DeepVariantAnalysis.java @@ -158,7 +158,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc getWrapper().setOutputDir(outputDir); getWrapper().setWorkingDir(outputDir); - getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, output, binVersion, args); + getWrapper().execute(inputBam, referenceGenome.getWorkingFastaFile(), outputFile, retainVcf, output, binVersion, args, getPipelineCtx()); output.addOutput(outputFile, "gVCF File"); output.addSequenceOutput(outputFile, outputFile.getName(), "DeepVariant gVCF File", rs.getReadsetId(), null, referenceGenome.getGenomeId(), "DeepVariant Version: " + binVersion); @@ -220,7 +220,7 @@ private File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTr } } - public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs) throws PipelineJobException + public void execute(File inputBam, File refFasta, File outputGvcf, boolean retainVcf, PipelineOutputTracker tracker, String binVersion, List extraArgs, PipelineContext ctx) throws PipelineJobException { File workDir = outputGvcf.getParentFile(); File outputVcf = new File(outputGvcf.getPath().replaceAll(".g.vcf", ".vcf")); @@ -270,6 +270,7 @@ public void execute(File inputBam, File refFasta, File outputGvcf, boolean retai writer.println("sudo $DOCKER run --rm=true \\"); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); if (!StringUtils.isEmpty(System.getenv("TMPDIR"))) { writer.println("\t-v \"${TMPDIR}:/tmp\" \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java index dc1f43a6c..b561b320b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java @@ -779,9 +779,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc if (runPangolinAndNextClade) { PangolinHandler.PANGO_MODE pangoMode = PangolinHandler.PANGO_MODE.valueOf(getProvider().getParameterByName(PangolinHandler.PANGO_MODE.class.getSimpleName()).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class, PangolinHandler.PANGO_MODE.both.name())); - pangolinData = PangolinHandler.runPangolin(outputDir, consensusFastaLoFreq, output, getPipelineCtx().getLogger(), pangoMode); + pangolinData = PangolinHandler.runPangolin(outputDir, consensusFastaLoFreq, output, getPipelineCtx().getLogger(), pangoMode, getPipelineCtx()); - File json = NextCladeHandler.runNextClade(consensusFastaLoFreq, getPipelineCtx().getLogger(), output, outputDir); + File json = NextCladeHandler.runNextClade(consensusFastaLoFreq, getPipelineCtx().getLogger(), output, outputDir, getPipelineCtx()); output.addSequenceOutput(json, "Nextclade: " + rs.getName(), "NextClade JSON", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java index df8ab3ee7..6e5e4320d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/NextCladeHandler.java @@ -26,6 +26,7 @@ import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; @@ -127,7 +128,7 @@ public void processFilesRemote(List inputFiles, JobContext c { for (SequenceOutputFile so : inputFiles) { - File nextCladeJson = runNextClade(so.getFile(), ctx.getLogger(), ctx.getFileManager(), ctx.getWorkingDirectory()); + File nextCladeJson = runNextClade(so.getFile(), ctx.getLogger(), ctx.getFileManager(), ctx.getWorkingDirectory(), ctx); ctx.getFileManager().addSequenceOutput(nextCladeJson, "Nextclade: " + so.getName(), NEXTCLADE_JSON, so.getReadset(), null, so.getLibrary_id(), null); } } @@ -138,7 +139,7 @@ public static File getJsonFile(File outputDir, File consensusFasta) return new File(outputDir, FileUtil.getBaseName(consensusFasta) + ".json"); } - public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir) throws PipelineJobException + public static File runNextClade(File consensusFasta, Logger log, PipelineOutputTracker tracker, File outputDir, PipelineContext ctx) throws PipelineJobException { if (!consensusFasta.getParentFile().equals(outputDir)) { @@ -183,6 +184,7 @@ public static File runNextClade(File consensusFasta, Logger log, PipelineOutputT } writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java index 00410a095..56608dde2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java @@ -26,6 +26,7 @@ import org.labkey.api.reader.Readers; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; @@ -235,7 +236,7 @@ public void processFilesRemote(List inputFiles, JobContext c for (SequenceOutputFile so : inputFiles) { PangolinHandler.PANGO_MODE pangoMode = PangolinHandler.PANGO_MODE.valueOf(ctx.getParams().optString(PangolinHandler.PANGO_MODE.class.getSimpleName(), PANGO_MODE.both.name())); - Map pangolinData = runPangolin(ctx.getWorkingDirectory(), so.getFile(), ctx.getFileManager(), ctx.getLogger(), pangoMode); + Map pangolinData = runPangolin(ctx.getWorkingDirectory(), so.getFile(), ctx.getFileManager(), ctx.getLogger(), pangoMode, ctx); List vals = new ArrayList<>(); vals.add(String.valueOf(so.getRowid())); for (String key : PANGO_FIELDS) @@ -263,7 +264,7 @@ public static File getRenamedPangolinOutput(File consensusFasta, PANGO_MODE mode return new File(consensusFasta.getParentFile(), FileUtil.getBaseName(consensusFasta) + "." + mode.name() + ".pangolin.csv"); } - private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs) throws PipelineJobException + private static File runUsingDocker(File outputDir, Logger log, File consensusFasta, PipelineOutputTracker tracker, List extraArgs, PipelineContext ctx) throws PipelineJobException { if (!consensusFasta.getParentFile().equals(outputDir)) { @@ -307,6 +308,7 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas String extraArgString = extraArgs == null ? "" : " " + StringUtils.join(extraArgs, " "); writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); writer.println("\t-w /work \\"); @@ -335,7 +337,7 @@ private static File runUsingDocker(File outputDir, Logger log, File consensusFas return output; } - public static Map runPangolin(File workDir, File consensusFasta, PipelineOutputTracker tracker, Logger log, PANGO_MODE pangoMode) throws PipelineJobException + public static Map runPangolin(File workDir, File consensusFasta, PipelineOutputTracker tracker, Logger log, PANGO_MODE pangoMode, PipelineContext ctx) throws PipelineJobException { List modes = PANGO_MODE.getModes(pangoMode); @@ -344,7 +346,7 @@ public static Map runPangolin(File workDir, File consensusFasta, for (PANGO_MODE mode : modes) { List extraArgs = mode == PANGO_MODE.usher ? Collections.singletonList("--usher") : null; - File output = runUsingDocker(workDir, log, consensusFasta, tracker, extraArgs); + File output = runUsingDocker(workDir, log, consensusFasta, tracker, extraArgs, ctx); try { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java index 33c559daf..d2273f700 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAlignmentAggregator.java @@ -17,24 +17,19 @@ import au.com.bytecode.opencsv.CSVReader; import au.com.bytecode.opencsv.CSVWriter; -import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordIterator; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; -import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.fastq.FastqReader; import htsjdk.samtools.fastq.FastqRecord; import htsjdk.samtools.fastq.FastqWriter; import htsjdk.samtools.fastq.FastqWriterFactory; import htsjdk.samtools.reference.IndexedFastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalList; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.LogManager; import org.labkey.api.data.Container; import org.labkey.api.data.DbScope; import org.labkey.api.data.SimpleFilter; @@ -46,12 +41,9 @@ import org.labkey.api.reader.Readers; import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.model.AnalysisModel; -import org.labkey.api.util.FileType; import org.labkey.api.util.Pair; -import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisSchema; -import org.labkey.sequenceanalysis.api.picard.CigarPositionIterable; import org.labkey.sequenceanalysis.run.alignment.FastqCollapser; import org.labkey.sequenceanalysis.run.util.FlashWrapper; import org.labkey.sequenceanalysis.run.util.NTSnp; @@ -59,16 +51,9 @@ import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.BufferedReader; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; import java.io.PrintWriter; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -81,7 +66,6 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; -import java.util.zip.GZIPOutputStream; /** * User: bimber @@ -303,22 +287,9 @@ public String getKey(SAMRecord record) ; } - public OutputStream getLogOutputStream(File outputLog) throws IOException - { - FileType gz = new FileType(".gz"); - if (gz.isType(outputLog)) - { - return new GZIPOutputStream(new FileOutputStream(outputLog)); - } - else - { - return new FileOutputStream(outputLog); - } - } - public Map getAlignmentSummary(File outputLog) throws IOException, PipelineJobException { - try (CSVWriter writer = outputLog == null ? null : new CSVWriter(new BufferedWriter(new OutputStreamWriter(getLogOutputStream(outputLog), StandardCharsets.UTF_8)), '\t', CSVWriter.NO_QUOTE_CHARACTER)) + try (CSVWriter writer = outputLog == null ? null : new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(outputLog), '\t', CSVWriter.NO_QUOTE_CHARACTER)) { //these are stage-1 filters, filtering on the read-pair level Map totals = doFilterStage1(writer); @@ -899,7 +870,7 @@ private Map doFilterStage4(CSVWriter writer, Map return stage4Totals; } - private class HitSet + private static class HitSet { public Set readNames = new HashSet<>(); public Set refNames = new TreeSet<>(); @@ -1047,7 +1018,7 @@ public void writeOutput(User u, Container c, AnalysisModel model) public static void processSBTSummary(User u, Container c, AnalysisModel model, File output, File refFasta, Logger log) throws PipelineJobException { - try (CSVReader reader = new CSVReader(new BufferedReader(new InputStreamReader(new FileInputStream(output), StandardCharsets.UTF_8)), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) + try (CSVReader reader = new CSVReader(IOUtil.openFileForBufferedUtf8Reading(output), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) { try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction()) { @@ -1117,7 +1088,7 @@ public static void processSBTSummary(User u, Container c, AnalysisModel model, F public void writeTable(File output) throws PipelineJobException { - try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(output), '\t')) + try (CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(output), '\t')) { Map map = writeSummary(); @@ -1326,7 +1297,7 @@ else if (f.getName().contains("_2")) //rename reads to make it easier to combine later File renamed = new File(outDir, basename + ".collapsed.tmp.fasta"); - try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(renamed), StringUtilsLabKey.DEFAULT_CHARSET)))) + try (BufferedReader reader = Readers.getReader(collapsed);PrintWriter writer = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(renamed))) { String line; while ((line = reader.readLine()) != null) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java index 051a508f6..3ad16b82a 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SequenceBasedTypingAnalysis.java @@ -211,12 +211,6 @@ public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, SequenceBasedTypingAlignmentAggregator.processSBTSummary(getPipelineCtx().getJob().getUser(), getPipelineCtx().getJob().getContainer(), model, expectedTxt, referenceFasta, getPipelineCtx().getLogger()); - File compressed = Compress.compressGzip(expectedTxt); - if (compressed.exists() && expectedTxt.exists()) - { - expectedTxt.delete(); - } - // Perform second pass to collapse groups: new AlignmentGroupCompare(model.getAnalysisId(), getPipelineCtx().getJob().getContainer(), getPipelineCtx().getJob().getUser()).collapseGroups(getPipelineCtx().getLogger(), getPipelineCtx().getJob().getUser()); } @@ -312,7 +306,8 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc //write output as TSV agg.writeTable(getSBTSummaryFile(outputDir, inputBam)); - output.addSequenceOutput(sbtOutputLog, "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + // This will be gzipped later: + output.addSequenceOutput(getSBTSummaryFile(outputDir, inputBam), "SBT Results: " + inputBam.getName(), "SBT Results", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); //optionally output FASTQ of unmapped reads Double exportThreshold = getProvider().getParameterByName(EXPORT_UNMAPPED).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); @@ -377,7 +372,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc protected File getSBTSummaryFile(File outputDir, File bam) { - return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt"); + return new File(outputDir, FileUtil.getBaseName(bam) + ".sbt_hits.txt.gz"); } public static class AlignmentGroupCompare diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java index 0fbd9a075..6289f959e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java @@ -515,7 +515,7 @@ else if (!samples.equals(header.getGenotypeSamples())) writer.write("} | bgzip -f" + (compressionLevel == null ? "" : " --compress-level 9") + (threads == null ? "" : " --threads " + threads) + " > '" + outputGzip.getPath() + "'\n"); } - File mergeDone = new File(outputGzip.getParentFile(), "merge.done"); + File mergeDone = new File(outputGzip.getPath() + ".merge.done"); if (mergeDone.exists()) { log.debug("Merge done file exists, will not repeat merge"); diff --git a/jbrowse/resources/views/genotypeTable.html b/jbrowse/resources/views/genotypeTable.html index 35dd8413e..ce6945c61 100644 --- a/jbrowse/resources/views/genotypeTable.html +++ b/jbrowse/resources/views/genotypeTable.html @@ -5,6 +5,24 @@ var start = LABKEY.ActionURL.getParameter("start"); var stop = LABKEY.ActionURL.getParameter("stop"); + if (!start || !stop) { + alert('Must provide a start and stop!') + return + } + + if (isNaN(start)) { + start = String(start).replaceAll(',', '') + } + + if (isNaN(stop)) { + stop = String(stop).replaceAll(',', '') + } + + if (isNaN(start) || isNaN(stop)) { + alert('Start and stop must be integers!') + return + } + LABKEY.Ajax.request({ url: LABKEY.ActionURL.buildURL('jbrowse', 'getGenotypes', null), method: 'POST', diff --git a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx index 1fe878691..3c3ddced5 100644 --- a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx +++ b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantWidget/ExtendedVariantWidget.tsx @@ -407,7 +407,7 @@ export default jbrowse => { feat["INFO"] = null return ( - + {message} rec.x > 0 & rec.x <= winWidth & rec.y > 0 & rec.y <= winHeight + ).count(); + } } diff --git a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java index d97f01de3..e5412f2d3 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java +++ b/singlecell/api-src/org/labkey/api/singlecell/pipeline/AbstractSingleCellPipelineStep.java @@ -350,6 +350,7 @@ public static void executeR(SequenceOutputHandler.JobContext ctx, String dockerC File tmpDir = new File(SequencePipelineService.get().getJavaTempDir()); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"" + tmpDir.getPath() + ":/tmp\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 39b5d1635..040317e71 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -7,8 +7,8 @@ invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) # NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted -if (file.exists('debug.nimble.txt')) { - unlink('debug.nimble.txt') +if (file.exists('debug.nimble.txt.gz')) { + unlink('debug.nimble.txt.gz') } for (datasetId in names(seuratObjects)) { @@ -16,7 +16,8 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, dropAmbiguousFeatures = !retainAmbiguousFeatures, maxLibrarySizeRatio = maxLibrarySizeRatio) + maxAmbiguityAllowed <- !nimbleGenomeAmbiguousPreference[[genomeId]] + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio) } saveData(seuratObj, datasetId) diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index 30dc0cd5c..f01e307ce 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { initComponent: function(){ Ext4.apply(this, { style: 'padding: 10px;margins: 5px;', - minWidth: 650, + minWidth: 850, border: true, items: [{ html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.', @@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },{ xtype: 'ldk-gridpanel', clicksToEdit: 1, - width: 600, + width: 775, tbar: [{ text: 'Add', handler: function(btn){ @@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },LABKEY.ext4.GRIDBUTTONS.DELETERECORD()], store: { type: 'array', - fields: ['genomeId', 'targetAssay'] + fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed'] }, columns: [{ dataIndex: 'genomeId', @@ -68,6 +68,15 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { xtype: 'textfield', allowBlank: false } + },{ + dataIndex: 'maxAmbiguityAllowed', + width: 175, + header: 'Max Ambiguity Allowed', + editor: { + xtype: 'ldk-integerfield', + allowBlank: true, + minValue: 0 + } }] }] }); @@ -78,7 +87,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { getValue: function(){ var ret = []; this.down('ldk-gridpanel').store.each(function(r, i) { - ret.push([r.data.genomeId, r.data.targetAssay]); + ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '']); }, this); return Ext4.isEmpty(ret) ? null : JSON.stringify(ret); @@ -113,7 +122,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { Ext4.Array.forEach(val, function(row){ var rec = grid.store.createModel({ genomeId: row[0], - targetAssay: row[1] + targetAssay: row[1], + maxAmbiguityAllowed: row[2] }); grid.store.add(rec); }, this); diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js index b2f56edfc..8cf871dda 100644 --- a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js +++ b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js @@ -299,7 +299,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', { }, hto: function(val, panel){ - if (val === 'N/A' || val === 'NA') { + if (val === 'N/A' || val === 'NA' || val === 'N') { return null; } diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 2675311c2..6aab5bcdc 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -31,6 +31,7 @@ import org.labkey.api.security.User; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; @@ -635,7 +636,7 @@ public File generateHashingCallsForRawMatrix(Readset parentReadset, PipelineOutp if (!doneFile.exists()) { - callsFile = generateCellHashingCalls(rawCountMatrixDir, ctx.getOutputDir(), outputBasename, ctx.getLogger(), ctx.getSourceDirectory(), parameters); + callsFile = generateCellHashingCalls(rawCountMatrixDir, ctx.getOutputDir(), outputBasename, ctx.getLogger(), ctx.getSourceDirectory(), parameters, ctx); try { @@ -1195,7 +1196,7 @@ private File getMolInfoFileFromCounts(File citeSeqCountOutDir) return new File(citeSeqCountOutDir.getParentFile(), "molecule_info.h5"); } - public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters) throws PipelineJobException + public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, File localPipelineDir, CellHashingService.CellHashingParameters parameters, PipelineContext ctx) throws PipelineJobException { log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath()); @@ -1325,6 +1326,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St writer.println("\t-e CELLHASHR_DEBUG=1 \\"); writer.println("\t-v \"${WD}:/work\" \\"); + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); writer.println("\t-e USERID=$UID \\"); diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 4ce393e0f..7749acfd1 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -109,6 +109,7 @@ import org.labkey.singlecell.run.CellRangerVDJWrapper; import org.labkey.singlecell.run.NimbleAlignmentStep; import org.labkey.singlecell.run.NimbleAnalysis; +import org.labkey.singlecell.run.RepeatNimbleReportHandler; import org.labkey.singlecell.run.VelocytoAlignmentStep; import org.labkey.singlecell.run.VelocytoAnalysisStep; @@ -221,6 +222,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new CellRangerRawDataHandler()); SequenceAnalysisService.get().registerFileHandler(new ProcessSingleCellHandler()); SequenceAnalysisService.get().registerFileHandler(new ProcessSeuratObjectHandler()); + SequenceAnalysisService.get().registerFileHandler(new RepeatNimbleReportHandler()); //Single-cell: SequencePipelineService.get().registerPipelineStep(new AppendCiteSeq.Provider()); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index bdfe7cb04..b023676bb 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -32,10 +32,10 @@ public Provider() {{ put("allowBlank", false); }}, null), - SeuratToolParameter.create("retainAmbiguousFeatures", "Retain Ambiguous Features", "If checked, features hitting more than one reference will be retained", "checkbox", new JSONObject() + SeuratToolParameter.create("maxAmbiguityAllowed", "Max Ambiguity Allowed", "If provided, ambiguous features with more than this number of values will be discarded (e.g. if maxAmbiguityAllowed=2, then the feature Feat1,Feat2,Feat3 would be discared, but not Feat1,Feat3. This can be overridden per genome.", "ldk-integerfield", new JSONObject() {{ - put("check", false); - }}, false, null, true), + put("minValue", 0); + }}, 0, null, true), SeuratToolParameter.create("ensureSamplesShareAllGenomes", "Ensure Samples Share All Genomes", "If checked, the job will fail unless nimble data is found for each requested genome for all samples", "checkbox", new JSONObject() {{ put("check", true); @@ -76,7 +76,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List args = new ArrayList<>(); - args.add(getWrapper().getExe().getPath()); + args.add(getWrapper().getExe(false).getPath()); args.add("mkvdjref"); args.add("--seqs=" + getGenomeFasta().getPath()); args.add("--genome=" + indexDir.getName()); @@ -301,8 +302,10 @@ public AlignmentStep.AlignmentOutput performAlignment(Readset rs, List inp { AlignmentOutputImpl output = new AlignmentOutputImpl(); + boolean useCellRanger7 = getProvider().getParameterByName("useCellRanger7").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + List args = new ArrayList<>(); - args.add(getWrapper().getExe().getPath()); + args.add(getWrapper().getExe(useCellRanger7).getPath()); args.add("multi"); args.add("--disable-ui"); @@ -830,9 +833,9 @@ private String getSampleName(String fn) } } - protected File getExe() + protected File getExe(boolean useCellRanger7) { - return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", "cellranger"); + return SequencePipelineService.get().getExeForPackage("CELLRANGERPATH", useCellRanger7 ? "cellranger7": "cellranger"); } private static void processAndMergeCSVs(File abCSV, File gdCSV, Logger log) throws PipelineJobException @@ -977,7 +980,7 @@ else if (uniqueChains.size() == 2) { try { - String ret = executeWithOutput(Arrays.asList(getExe().getPath(), "--version")); + String ret = executeWithOutput(Arrays.asList(getExe(false).getPath(), "--version")); return ret.replaceAll("^cellranger ", ""); } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 891748ca0..3c2268aa4 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -53,6 +53,8 @@ public class NimbleHelper private final PipelineStepProvider _provider; private final int _stepIdx; + public static final String NIMBLE_REPORT_CATEGORY = "Nimble Report"; + public NimbleHelper(PipelineContext ctx, PipelineStepProvider provider, int stepIdx) { _ctx = ctx; @@ -288,16 +290,19 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin description += "\nscore_percent: " + genome.getScorePercent(); } - output.addSequenceOutput(results, basename + ": nimble align", "Nimble Alignment", rs.getRowId(), null, genome.getGenomeId(), description); + output.addSequenceOutput(results, basename + ": nimble align", "Nimble Results", rs.getRowId(), null, genome.getGenomeId(), description); - File outputBam = new File(results.getPath().replaceAll("results." + genome.genomeId + ".txt.gz", "nimbleAlignment." + genome.genomeId + ".bam")); - if (outputBam.exists()) + File reportHtml = getReportHtmlFileFromResults(results); + if (!reportHtml.exists()) { - output.addSequenceOutput(outputBam, basename + ": nimble align", "Nimble Alignment", rs.getRowId(), null, genome.getGenomeId(), description); + if (SequencePipelineService.get().hasMinLineCount(results, 2)) + { + throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); + } } else { - getPipelineCtx().getLogger().debug("BAM not found: " + outputBam.getPath()); + output.addSequenceOutput(reportHtml, basename + ": nimble report", NIMBLE_REPORT_CATEGORY, rs.getRowId(), null, genome.getGenomeId(), description); } } } @@ -361,7 +366,7 @@ else if ("strict".equals(alignTemplate)) config.put("num_mismatches", 0); config.put("intersect_level", 0); // NOTE: this allows a small amount of mismatched ends: - config.put("score_percent", 0.90); + config.put("score_percent", 0.99); config.put("score_threshold", 45); config.put("score_filter", 25); } @@ -471,38 +476,82 @@ private Map doAlignment(List genomes, List reportArgs = new ArrayList<>(); - reportArgs.add("python3"); - reportArgs.add("-m"); - reportArgs.add("nimble"); + File reportResultsGz = runNimbleReport(alignResultsGz, genome.genomeId, output, getPipelineCtx()); + resultMap.put(genome, reportResultsGz); + } + + return resultMap; + } + + public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException + { + List reportArgs = new ArrayList<>(); + reportArgs.add("python3"); + reportArgs.add("-m"); + reportArgs.add("nimble"); + + reportArgs.add("report"); + reportArgs.add("-i"); + reportArgs.add("/work/" + alignResultsGz.getName()); + + File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt"); + if (reportResultsGz.exists()) + { + reportResultsGz.delete(); + } + + reportArgs.add("-o"); + reportArgs.add("/work/" + reportResultsGz.getName()); + + runUsingDocker(reportArgs, output, null, ctx); + + if (!reportResultsGz.exists()) + { + throw new PipelineJobException("Missing file: " + reportResultsGz.getPath()); + } - reportArgs.add("report"); - reportArgs.add("-i"); - reportArgs.add("/work/" + alignResultsGz.getName()); + if (SequencePipelineService.get().hasMinLineCount(alignResultsGz, 2)) + { + // Also run nimble plot. Always re-run since this is fast: + List plotArgs = new ArrayList<>(); + plotArgs.add("python3"); + plotArgs.add("-m"); + plotArgs.add("nimble"); + + plotArgs.add("plot"); + plotArgs.add("--input_file"); + plotArgs.add("/work/" + alignResultsGz.getName()); - File reportResultsGz = new File(getPipelineCtx().getWorkingDirectory(), "reportResults." + genome.genomeId + ".txt"); - if (reportResultsGz.exists()) + File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz); + if (plotResultsHtml.exists()) { - reportResultsGz.delete(); + plotResultsHtml.delete(); } - reportArgs.add("-o"); - reportArgs.add("/work/" + reportResultsGz.getName()); + plotArgs.add("--output_file"); + plotArgs.add("/work/" + plotResultsHtml.getName()); - runUsingDocker(reportArgs, output, null); + runUsingDocker(plotArgs, output, null, ctx); - if (!reportResultsGz.exists()) + if (!plotResultsHtml.exists()) { - throw new PipelineJobException("Missing file: " + reportResultsGz.getPath()); + throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); } - - resultMap.put(genome, reportResultsGz); + } + else + { + ctx.getLogger().info("Only single line found in results, skipping nimble plot"); } - return resultMap; + return reportResultsGz; + } + + public static File getReportHtmlFileFromResults(File reportResults) + { + return new File(reportResults.getPath().replaceAll("txt(.gz)*$", "html")); } - private File getNimbleDoneFile(File parentDir, String resumeString) + private static File getNimbleDoneFile(File parentDir, String resumeString) { return new File(parentDir, "nimble." + resumeString + ".done"); } @@ -511,13 +560,18 @@ private File getNimbleDoneFile(File parentDir, String resumeString) private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString) throws PipelineJobException { - File localBashScript = new File(getPipelineCtx().getWorkingDirectory(), "docker.sh"); - File dockerBashScript = new File(getPipelineCtx().getWorkingDirectory(), "dockerRun.sh"); + return runUsingDocker(nimbleArgs, output, resumeString, getPipelineCtx()); + } + + private static boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString, PipelineContext ctx) throws PipelineJobException + { + File localBashScript = new File(ctx.getWorkingDirectory(), "docker.sh"); + File dockerBashScript = new File(ctx.getWorkingDirectory(), "dockerRun.sh"); output.addIntermediateFile(localBashScript); output.addIntermediateFile(dockerBashScript); // Create temp folder: - File tmpDir = new File(getPipelineCtx().getWorkingDirectory(), "tmpDir"); + File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir"); if (tmpDir.exists()) { try @@ -551,6 +605,7 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu writer.println("\t--memory='" + maxRam + "g' \\"); } + ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\")); writer.println("\t-v \"${WD}:/work\" \\"); writer.println("\t-v \"${HOME}:/homeDir\" \\"); writer.println("\t-u $UID \\"); @@ -581,22 +636,22 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu File doneFile = null; if (resumeString != null) { - doneFile = getNimbleDoneFile(getPipelineCtx().getWorkingDirectory(), resumeString); + doneFile = getNimbleDoneFile(ctx.getWorkingDirectory(), resumeString); output.addIntermediateFile(doneFile); if (doneFile.exists()) { - getPipelineCtx().getLogger().info("Nimble already completed, resuming: " + resumeString); + ctx.getLogger().info("Nimble already completed, resuming: " + resumeString); return false; } else { - getPipelineCtx().getLogger().debug("done file not found: " + doneFile.getPath()); + ctx.getLogger().debug("done file not found: " + doneFile.getPath()); } } - SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); - rWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory()); + SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger()); + rWrapper.setWorkingDir(ctx.getWorkingDirectory()); rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName())); if (doneFile != null) @@ -615,18 +670,23 @@ private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput outpu } private File ensureLocalCopy(File input, PipelineStepOutput output) throws PipelineJobException + { + return ensureLocalCopy(input, output, getPipelineCtx()); + } + + public static File ensureLocalCopy(File input, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException { try { - if (getPipelineCtx().getWorkingDirectory().equals(input.getParentFile())) + if (ctx.getWorkingDirectory().equals(input.getParentFile())) { return input; } - File local = new File(getPipelineCtx().getWorkingDirectory(), input.getName()); + File local = new File(ctx.getWorkingDirectory(), input.getName()); if (!local.exists()) { - getPipelineCtx().getLogger().debug("Copying file locally: " + input.getPath()); + ctx.getLogger().debug("Copying file locally: " + input.getPath()); FileUtils.copyFile(input, local); } @@ -699,9 +759,7 @@ public Integer getNumMismatches() private String getVersion(PipelineStepOutput output) throws PipelineJobException { List nimbleArgs = new ArrayList<>(); - nimbleArgs.add("/bin/bash"); - nimbleArgs.add("-c"); - nimbleArgs.add("python3 -m nimble -v > /work/nimbleVersion.txt"); + nimbleArgs.add("/bin/bash -c 'python3 -m nimble -v' > /work/nimbleVersion.txt"); runUsingDocker(nimbleArgs, output, null); @@ -711,10 +769,15 @@ private String getVersion(PipelineStepOutput output) throws PipelineJobException throw new PipelineJobException("Unable to find file: " + outFile.getPath()); } - String ret; - try (BufferedReader reader = Readers.getReader(outFile)) + String ret = null; + try { - ret = reader.readLine(); + ret = StringUtils.trimToNull(Files.readString(outFile.toPath())); + if (ret == null) + { + throw new PipelineJobException("nimble -v did not output version"); + } + ret = ret.replaceAll("nimble", "").replaceAll("[\\r\\n]+", ""); } catch (IOException e) { diff --git a/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java new file mode 100644 index 000000000..9228cd71e --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/run/RepeatNimbleReportHandler.java @@ -0,0 +1,208 @@ +package org.labkey.singlecell.run; + +import org.apache.commons.io.FileUtils; +import org.json.JSONObject; +import org.labkey.api.collections.CaseInsensitiveHashMap; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; +import org.labkey.api.exp.api.DataType; +import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExperimentService; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.query.BatchValidationException; +import org.labkey.api.query.DuplicateKeyException; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.QueryUpdateServiceException; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.DefaultPipelineStepOutput; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.util.FileType; +import org.labkey.singlecell.SingleCellModule; +import org.labkey.singlecell.SingleCellSchema; + +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class RepeatNimbleReportHandler extends AbstractParameterizedOutputHandler +{ + public RepeatNimbleReportHandler() + { + super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Re-run Nimble Report", "This will re-run nimble report and nimble plot for the selected run and replace the original files in-place.", null, Arrays.asList( + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true) + ) + ); + } + + private static final FileType _nimbleResultsGz = new FileType(".txt", FileType.gzSupportLevel.SUPPORT_GZ); + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && o.getFile().exists() && o.getFile().getName().startsWith("reportResults.") && _nimbleResultsGz.isType(o.getFile()); + } + + @Override + public boolean useWorkbooks() + { + return true; + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + private static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + private File getAlignmentResults(File reportResults) + { + return new File(reportResults.getParentFile(), reportResults.getName().replaceAll("reportResults", "alignResults") + ".gz"); + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + PipelineStepOutput output = new DefaultPipelineStepOutput(); + + for (SequenceOutputFile so : inputFiles) + { + // This is the prior report results: + File alignmentFile = getAlignmentResults(so.getFile()); + if (!alignmentFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + alignmentFile.getPath()); + } + + // This will update these files in-place: + File alignmentFileLocal = NimbleHelper.ensureLocalCopy(alignmentFile, output, ctx); + File reportFile = NimbleHelper.runNimbleReport(alignmentFileLocal, so.getLibrary_id(), output, ctx); + if (!reportFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + reportFile.getPath()); + } + + File htmlFile = NimbleHelper.getReportHtmlFileFromResults(reportFile); + if (!htmlFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + htmlFile.getPath()); + } + + // Replace the originals: + try + { + File targetHtml = new File(so.getFile().getParentFile(), htmlFile.getName()); + if (targetHtml.exists()) + { + targetHtml.delete(); + } + FileUtils.moveFile(htmlFile, targetHtml); + + File targetReport = new File(so.getFile().getParentFile(), reportFile.getName()); + if (targetReport.exists()) + { + targetReport.delete(); + } + else + { + ctx.getLogger().error("Expected report file to exist: " + targetReport.getPath()); + } + FileUtils.moveFile(reportFile, targetReport); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + ctx.getFileManager().addIntermediateFiles(output.getIntermediateFiles()); + } + + @Override + public void complete(JobContext ctx, List inputs, List outputsCreated) throws PipelineJobException + { + // Because the plot output was added later, re-create this if it doesnt exist: + for (SequenceOutputFile so : inputs) + { + File plotFile = NimbleHelper.getReportHtmlFileFromResults(so.getFile()); + + TableInfo ti = QueryService.get().getUserSchema(ctx.getJob().getUser(), so.getContainerObj(), SingleCellSchema.SEQUENCE_SCHEMA_NAME).getTable("outputfiles"); + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("category"), "").addCondition(FieldKey.fromString("dataid/dataFileUrl"), plotFile.toURI().toString()); + TableSelector ts = new TableSelector(ti, filter, null); + if (!ts.exists()) + { + ExpData expData = ExperimentService.get().getExpDataByURL(plotFile, so.getContainerObj()); + if (expData == null) + { + expData = ExperimentService.get().createData(so.getContainerObj(), new DataType("Nimble Results")); + expData.setDataFileURI(plotFile.toURI()); + expData.setName(plotFile.getName()); + expData.save(ctx.getJob().getUser()); + } + + Map toInsert = new CaseInsensitiveHashMap<>(); + toInsert.put("name", so.getName().replaceAll("nimble results", "nimble report")); + toInsert.put("category", NimbleHelper.NIMBLE_REPORT_CATEGORY); + toInsert.put("description", so.getDescription()); + toInsert.put("dataid", expData.getRowId()); + toInsert.put("library_id", so.getLibrary_id()); + toInsert.put("runid", so.getRunId()); + toInsert.put("analysis_id", so.getAnalysis_id()); + + try + { + ti.getUpdateService().insertRows(ctx.getJob().getUser(), so.getContainerObj(), Collections.singletonList(toInsert), new BatchValidationException(), null, null); + } + catch (SQLException | BatchValidationException | QueryUpdateServiceException | DuplicateKeyException e) + { + throw new PipelineJobException(e); + } +; } + else + { + ctx.getLogger().debug("Plot file output exists, will not re-create"); + } + } + } + } +}