diff --git a/AGAT/0.8.1/ConvertGffToGtf.nf b/AGAT/0.8.1/ConvertGffToGtf.nf new file mode 100755 index 00000000..5882cdcb --- /dev/null +++ b/AGAT/0.8.1/ConvertGffToGtf.nf @@ -0,0 +1,18 @@ +process ConvertGffToGtf { + tag {"AGAT ConvertGffToGtf ${gff}"} + label 'AGAT_0_8_1' + label 'AGAT_0_8_1_ConvertGffToGtf' + container = 'quay.io/biocontainers/agat:0.8.1--pl5262hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(gff) + + output: + path("${gff.simpleName}.gtf", emit: gtf) + + script: + """ + agat_convert_sp_gff2gtf.pl -gff ${gff} -o ${gff.simpleName}.gtf + """ +} diff --git a/BCFtools/1.10.2/View.nf b/BCFtools/1.10.2/View.nf index ad978a9f..7567f66d 100644 --- a/BCFtools/1.10.2/View.nf +++ b/BCFtools/1.10.2/View.nf @@ -7,13 +7,13 @@ process View_bcf_vcf { shell = ['/bin/bash', '-euo', 'pipefail'] input: - tuple(val(sample_id), file(bcf_file)) + tuple(val(sample_id), file(bcf_file)) output: - tuple(val(sample_id), file("${bcf_file.baseName}.vcf")) + tuple(val(sample_id), file("${bcf_file.baseName}.vcf")) script: - """ - bcftools view ${params.optional} ${bcf_file} > ${bcf_file.baseName}.vcf - """ + """ + bcftools view ${params.optional} ${bcf_file} > ${bcf_file.baseName}.vcf + """ } diff --git a/BCFtools/1.17/View.nf b/BCFtools/1.17/View.nf new file mode 100755 index 00000000..7b2d9781 --- /dev/null +++ b/BCFtools/1.17/View.nf @@ -0,0 +1,20 @@ +process View_bcf_vcf { + // BCFtools view can use different input and output files, this process converts bcf to vcf files. + tag {"BCFtools View_BCF_VCF ${sample_id}"} + label 'BCFtools_1_17' + label 'BCFtools_1_17_View_BCF_VCF' + container = 'quay.io/biocontainers/bcftools:1.17--h3cc50cf_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), file(input_file)) + + output: + tuple(val(sample_id), file("${input_file.baseName}.${extension}")) + + script: + extension = input_file.getExtension() == "vcf" ? "bcf" : "vcf" + """ + bcftools view ${params.optional} ${input_file} > ${input_file.baseName}.${extension} + """ +} diff --git a/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/BWASW.nf b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/BWASW.nf new file mode 100755 index 00000000..b5e71f1d --- /dev/null +++ b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/BWASW.nf @@ -0,0 +1,18 @@ +process BWASW { + tag {"BWA_MEM2 BWASW ${sample_id} - ${rg_id}"} + label 'BWA_MEM2_2_2_1' + label 'BWA_MEM2_2_2_1_BWASW' + container = 'blcdsdockerregistry/bwa-mem2_samtools-1.12:2.2.1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq)) + + output: + tuple(val(sample_id), val(rg_id_), path("${fastq[0].simpleName}.sam"), emit: sam_file) + + script: + """ + bwa-mem2 bwasw -t ${task.cpus} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ +} diff --git a/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Index.nf b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Index.nf new file mode 100755 index 00000000..308b2f97 --- /dev/null +++ b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Index.nf @@ -0,0 +1,22 @@ +index_loc = file("${params.genome_fasta}").toRealPath().toString().split("/")[0..-2].join("/") + +process Index { + tag {"BWA_MEM2 Index $fasta"} + label 'BWA_MEM2_2_2_1' + label 'BWA_MEM2_2_2_1_Index' + container = 'library://blcdsdockerregistry/bwa-mem2_samtools-1.12:2.2.1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + storeDir = index_loc + + input: + path(fasta) + + output: + path("${fasta}.{alt,amb,ann,bwt,pac,sa}", emit: bwa_index) + + script: + """ + bwa-mem2 index $params.optional $fasta + """ +} diff --git a/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/MEM.nf b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/MEM.nf new file mode 100755 index 00000000..17de4983 --- /dev/null +++ b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/MEM.nf @@ -0,0 +1,21 @@ +process MEM { + tag {"BWA_MEM2 MEM ${sample_id} - ${rg_id}"} + label 'BWA_MEM2_2_2_1' + label 'BWA_MEM2_2_2_1_MEM' + container = 'quay.io/blcdsdockerregistry/bwa-mem2_samtools-1.12:2.2.1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq)) + + output: + tuple(val(sample_id), val(rg_id), path("${fastq[0].simpleName}.sam"), emit: sam_file) + + script: + def barcode = rg_id.split('_')[1] + def readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" + + """ + bwa-mem2 mem -t ${task.cpus} -R ${readgroup} ${params.optional} ${params.genome} ${fastq} > ${fastq[0].simpleName}.sam + """ +} diff --git a/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Mapping.nf b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Mapping.nf new file mode 100755 index 00000000..6922fc70 --- /dev/null +++ b/BWA-Mem2/bwa-mem2_samtools-1.12_2.2.1/Mapping.nf @@ -0,0 +1,23 @@ +process BWAMapping { + tag {"BWA_MEM2 Mem ${sample_id} - ${rg_id}"} + label 'BWA_MEM2_2_2_1' + label 'BWA_MEM2_2_2_1_Mem' + container = 'library://blcdsdockerregistry/bwa-mem2_samtools-1.12:2.2.1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq)) + + output: + tuple(val(sample_id), val(rg_id), path("${rg_id}_sorted.bam"), path("${rg_id}_sorted.bai"), emit: mapped_bams) + + script: + def barcode = rg_id.split('_')[1] + def bwa_readgroup = "\"@RG\\tID:${rg_id}\\tSM:${sample_id}\\tPL:ILLUMINA\\tLB:${sample_id}\\tPU:${barcode}\"" + + """ + bwa-mem2 mem $params.optional -t ${task.cpus} -R $bwa_readgroup $params.genome_fasta $fastq | \ + samtools sort > ${rg_id}_sorted.bam + samtools index ${rg_id}_sorted.bam ${rg_id}_sorted.bai + """ +} diff --git a/ControlFREEC/11.6/AssessSignificance.nf b/ControlFREEC/11.6/AssessSignificance.nf new file mode 100755 index 00000000..c3c76e8c --- /dev/null +++ b/ControlFREEC/11.6/AssessSignificance.nf @@ -0,0 +1,18 @@ +process AssessSignificance { + tag {"Control Freec AssessSignificance ${sample_id}"} + label 'ControlFreec_11_6' + label 'ControlFreec_11_6_AssessSignificance' + container = 'quay.io/biocontainers/control-freec:11.6--h87f3376_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(ratio_file), path(cnv_file)) + + output: + tuple(val(sample_id), path("${cnv_file.name}.p.value.txt"), emit: cnv_pvalue) + + script: + """ + cat /usr/local/bin/assess_significance.R | R --slave --args ${cnv_file} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.6/Freec.nf b/ControlFREEC/11.6/Freec.nf new file mode 100755 index 00000000..c58b568a --- /dev/null +++ b/ControlFREEC/11.6/Freec.nf @@ -0,0 +1,35 @@ +process Freec { + tag {"Control Freec ${sample_id}"} + label 'ControlFreec_11_6' + label 'ControlFreec_11_6_Freec' + container = 'quay.io/biocontainers/control-freec:11.6--h87f3376_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.name}_ratio.txt"), path("${bam_file.name}_CNVs"), emit: cnv) + tuple(val(sample_id), path("${bam_file.name}_sample.cpn"), path("${bam_file.name}_ratio.BedGraph"), path("${bam_file.name}_info.txt"), emit: other) + + script: + def config = "${sample_id}.config" + """ + touch ${config} + echo "[general]" >> ${config} + echo "chrLenFile = ${params.chr_len_file}" >> ${config} + echo "chrFiles = ${params.chr_files}" >> ${config} + echo "gemMappabilityFile = ${params.gem_mappability_file}" >> ${config} + echo "ploidy = ${params.ploidy}" >> ${config} + echo "window = ${params.window}" >> ${config} + echo "telocentromeric = ${params.telocentromeric}" >> ${config} + echo "BedGraphOutput=TRUE" >> ${config} + echo "maxThreads=${task.cpus}" >> ${config} + + echo "[sample]" >> ${config} + echo "inputFormat = BAM" >> ${config} + echo "mateFile = ${bam_file}" >> ${config} + + freec -conf ${config} + """ +} diff --git a/ControlFREEC/11.6/MakeGraph.nf b/ControlFREEC/11.6/MakeGraph.nf new file mode 100755 index 00000000..80d1686a --- /dev/null +++ b/ControlFREEC/11.6/MakeGraph.nf @@ -0,0 +1,18 @@ +process MakeGraph { + tag {"Control Freec MakeGraph ${sample_id}"} + label 'ControlFreec_11_6' + label 'ControlFreec_11_6_MakeGraph' + container = 'quay.io/biocontainers/control-freec:11.6--h87f3376_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(ratio_file), path(cnv_file)) + + output: + tuple(val(sample_id), path("${ratio_file.name}.png"), path("${ratio_file.name}.log2.png"), emit: ratio_png) + + script: + """ + cat /usr/local/bin/makeGraph.R | R --slave --args ${params.ploidy} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.6/MakeGraphChromosome.nf b/ControlFREEC/11.6/MakeGraphChromosome.nf new file mode 100755 index 00000000..695bfb9f --- /dev/null +++ b/ControlFREEC/11.6/MakeGraphChromosome.nf @@ -0,0 +1,18 @@ +process MakeGraphChromosome { + tag {"Control Freec MakeGraphChromosome ${sample_id}"} + label 'ControlFreec_11_6' + label 'ControlFreec_11_6_MakeGraphChromosome' + container = 'quay.io/biocontainers/control-freec:11.6--h87f3376_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(ratio_file), path(cnv_file)) + + output: + tuple(val(sample_id), path("${ratio_file.name}*.png"), emit: ratio_png) + + script: + """ + cat /usr/local/bin/makeGraph_Chromosome.R | R --slave --args 1 ${params.ploidy} ${ratio_file} + """ +} diff --git a/ControlFREEC/11.6/MakeKaryotype.nf b/ControlFREEC/11.6/MakeKaryotype.nf new file mode 100755 index 00000000..1f8f1cc4 --- /dev/null +++ b/ControlFREEC/11.6/MakeKaryotype.nf @@ -0,0 +1,18 @@ +process MakeKaryotype { + tag {"Control Freec MakeKaryotype ${sample_id}"} + label 'ControlFreec_11_6' + label 'ControlFreec_11_6_MakeKaryotype' + container = 'quay.io/biocontainers/control-freec:11.6--h87f3376_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(ratio_file), path(cnv_file)) + + output: + tuple(val(sample_id), path("*_karyotype.pdf"), emit: karyotype_pdf) + + script: + """ + cat makeKaryotype.R | R --slave --args ${params.ploidy} ${params.maxlevel} ${params.telocentromeric} ${ratio_file} + """ +} diff --git a/DESeq2/1.28.0/deseq2Normalize.nf b/DESeq2/1.28.0/deseq2Normalize.nf new file mode 100755 index 00000000..af9fe112 --- /dev/null +++ b/DESeq2/1.28.0/deseq2Normalize.nf @@ -0,0 +1,20 @@ +process Deseq2Normalize { + tag "deseq2normalize ${run_id}" + label 'biconductor_1_28_0' + label 'biconductor_1_28_0_deseq2normalize' + container = 'quay.io/biocontainers/bioconductor-deseq2:1.28.0--r40h5f743cb_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + file(counts) + + output: + file("${run_id}_featureCounts_deseq2.txt") + + script: + """ + deseq2Normalize.R ${counts} ${run_id} + """ + +} diff --git a/DESeq2/1.38.0/deseq2Normalize.nf b/DESeq2/1.38.0/deseq2Normalize.nf new file mode 100644 index 00000000..6309a857 --- /dev/null +++ b/DESeq2/1.38.0/deseq2Normalize.nf @@ -0,0 +1,19 @@ +process Deseq2Normalize { + tag "deseq2normalize ${run_id}" + label 'biconductor_1_38_0' + label 'biconductor_1_38_0_deseq2normalize' + container = 'quay.io/biocontainers/bioconductor-deseq2:1.38.0--r42hc247a5b_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + file(counts) + + output: + file("${run_id}_featureCounts_deseq2.txt") + + script: + """ + deseq2Normalize.R ${counts} ${run_id} + """ +} diff --git a/Deeplexicon/1.2.0/dmux.nf b/Deeplexicon/1.2.0/dmux.nf new file mode 100644 index 00000000..3459ed26 --- /dev/null +++ b/Deeplexicon/1.2.0/dmux.nf @@ -0,0 +1,25 @@ +process dmux { + tag {"Deeplexicon_dmux"} + label 'Deeplexicon_1_2_0' + label 'Deeplexicon_1_2_0_dmux' + container = 'lpryszcz/deeplexicon:1.2.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample), val(chunk), path(fast5)) + + output: + tuple(val(sample), val(chunk), path("${sample}_${chunk}.demux2.tsv"), emit:tsv) + + script: + """ + #deeplexicon only works when folder is passed, not when fast5 itself is used so put file in folder for further processing + mkdir fast5_folder + cp -P ${fast5} fast5_folder/ + deeplexicon_multi.py dmux \ + --threads 2 \ + -p ./fast5_folder/ \ + -m /deeplexicon/models/resnet20-final.h5 \ + > ${sample}_${chunk}.demux2.tsv + """ +} diff --git a/Deeplexicon/1.2.0/fastq.nf b/Deeplexicon/1.2.0/fastq.nf new file mode 100644 index 00000000..db7cfff2 --- /dev/null +++ b/Deeplexicon/1.2.0/fastq.nf @@ -0,0 +1,18 @@ +process deeplexicon_concatFastq { + tag {"Deeplexicon_concat_fastq"} + label 'Deeplexicon_1_2_0' + label 'Deeplexicon_1_2_0_concat_fastq' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(id), val(chunk), path(fastq)) + + output: + tuple(val(id), path("${id}_combined.fastq"), emit:fastq) + + script: + """ + cat *.fastq.gz > ${id}_combined.fastq.gz + gunzip ${id}_combined.fastq.gz + """ +} \ No newline at end of file diff --git a/Deeplexicon/1.2.0/split.nf b/Deeplexicon/1.2.0/split.nf new file mode 100644 index 00000000..45b11ea3 --- /dev/null +++ b/Deeplexicon/1.2.0/split.nf @@ -0,0 +1,25 @@ +process split { + tag {"Deeplexicon_split"} + label 'Deeplexicon_1_2_0' + label 'Deeplexicon_1_2_0_split' + container = 'lpryszcz/deeplexicon:1.2.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(id), path(tsv),path(fastq)) + + output: + path("*.fastq.gz") + + script: + """ + deeplexicon_multi.py split \ + -i ${tsv} \ + -o . \ + -s ${id} \ + -q ${fastq} + + #deeplexicon outputs plain fastq, so gzip them + gzip ${id}_bc_*.fastq + """ +} \ No newline at end of file diff --git a/Deeplexicon/1.2.0/tsv.nf b/Deeplexicon/1.2.0/tsv.nf new file mode 100644 index 00000000..a8cb7771 --- /dev/null +++ b/Deeplexicon/1.2.0/tsv.nf @@ -0,0 +1,45 @@ +process deeplexicon_copyTsv { + tag {"deeplexicon_copyTsv"} + label 'deeplexicon_copyTsv' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample), val(chunk), path(tsv)) + + output: + tuple(val(sample), val(chunk), path("${sample}_${chunk}_copy_${tsv}"), emit:tsv) + + script: + """ + echo \"Copy tsv: ${tsv}\" + cp ${tsv} ${sample}_${chunk}_copy_${tsv} + """ +} + +process deeplexicon_concatTsv { + tag {"Deeplexicon_concat_tsv"} + label 'Deeplexicon_1_2_0' + label 'Deeplexicon_1_2_0_concat_tsv' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(id), val(chunk), path(tsv)) + + output: + tuple(val(id), path("${id}_combined.tsv"), emit:tsv) + + script: + """ + print_header=1 + echo \"Test concatTsv sample: ${id}\" + for tsv_chunk in ./*.tsv; do + #print header + if [[ \${print_header} -eq 1 ]]; then + head -n1 \${tsv_chunk} > ${id}_combined.tsv + print_header=0 + fi + #print content + tail -n+2 \${tsv_chunk} >> ${id}_combined.tsv + done + """ +} \ No newline at end of file diff --git a/FastQC/0.11.9/FastQC.nf b/FastQC/0.11.9/FastQC.nf new file mode 100755 index 00000000..e7b907ad --- /dev/null +++ b/FastQC/0.11.9/FastQC.nf @@ -0,0 +1,17 @@ +process FastQC { + tag {"FastQC ${sample_id} - ${rg_id}"} + label 'FASTQC_0_11_9' + container = 'quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq)) + + output: + path("*_fastqc.{zip,html}", emit: report) + + script: + """ + fastqc ${params.optional} -t ${task.cpus} ${fastq} + """ +} diff --git a/GATK/4.1.3.0/BaseRecalibration.nf b/GATK/4.1.3.0/BaseRecalibration.nf index de2eed90..a220e77c 100644 --- a/GATK/4.1.3.0/BaseRecalibration.nf +++ b/GATK/4.1.3.0/BaseRecalibration.nf @@ -5,6 +5,7 @@ process BaseRecalibration { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(sample_id), file(bam), path(bai),path(recal_table), path(interval_file)) @@ -14,8 +15,7 @@ process BaseRecalibration { script: int_tag = interval_file.toRealPath().toString().split("/")[-2] """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - ApplyBQSR \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" ApplyBQSR \ --input $bam \ --output ${sample_id}.${int_tag}_recalibrated.bam \ -R ${params.genome_fasta} \ diff --git a/GATK/4.1.3.0/BaseRecalibrationTable.nf b/GATK/4.1.3.0/BaseRecalibrationTable.nf index c57451c9..e076a6a0 100644 --- a/GATK/4.1.3.0/BaseRecalibrationTable.nf +++ b/GATK/4.1.3.0/BaseRecalibrationTable.nf @@ -5,6 +5,7 @@ process BaseRecalibrationTable { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(sample_id), path(bam), path(bai), path(interval_file)) @@ -14,10 +15,9 @@ process BaseRecalibrationTable { script: known = params.genome_known_sites ? '--known-sites ' + params.genome_known_sites.join(' --known-sites ') : '' int_tag = interval_file.toRealPath().toString().split("/")[-2] - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - BaseRecalibrator \ + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" BaseRecalibrator \ --input $bam \ --output ${sample_id}.${int_tag}.recal.table \ -R ${params.genome_fasta} \ diff --git a/GATK/4.1.3.0/CollectMultipleMetrics.nf b/GATK/4.1.3.0/CollectMultipleMetrics.nf index e8f04398..7edb3eb8 100644 --- a/GATK/4.1.3.0/CollectMultipleMetrics.nf +++ b/GATK/4.1.3.0/CollectMultipleMetrics.nf @@ -1,24 +1,24 @@ process CollectMultipleMetrics { - tag {"GATK CollectMultipleMetrics ${sample_id}"} - label 'GATK_4_1_3_0' - label 'GATK_4_1_3_0_CollectMultipleMetrics' - clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" - container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - input: - tuple(val(sample_id), path(bam)) + tag {"GATK CollectMultipleMetrics ${sample_id}"} + label 'GATK_4_1_3_0' + label 'GATK_4_1_3_0_CollectMultipleMetrics' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' + shell = ['/bin/bash', '-euo', 'pipefail'] - output: - path ("${sample_id}.multiple_metrics*", emit : multiple_metrics) + input: + tuple(val(sample_id), path(bam)) - script: - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - CollectMultipleMetrics \ - -I $bam \ - -O ${sample_id}.multiple_metrics\ - -R ${params.genome_fasta} \ - --TMP_DIR \$TMPDIR \ - ${params.optional} - """ + output: + path("${sample_id}.multiple_metrics*", emit : multiple_metrics) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CollectMultipleMetrics \ + -I $bam \ + -O ${sample_id}.multiple_metrics\ + -R ${params.genome_fasta} \ + --TMP_DIR \$TMPDIR \ + ${params.optional} + """ } diff --git a/GATK/4.1.3.0/CollectWGSMetrics.nf b/GATK/4.1.3.0/CollectWGSMetrics.nf index c05f7686..d17970e2 100644 --- a/GATK/4.1.3.0/CollectWGSMetrics.nf +++ b/GATK/4.1.3.0/CollectWGSMetrics.nf @@ -1,25 +1,26 @@ process CollectWGSMetrics { - tag {"GATK CollectWGSMetrics ${sample_id}"} - label 'GATK_4_1_3_0' - label 'GATK_4_1_3_0_CollectWGSMetrics' - clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" - container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' - shell = ['/bin/bash', '-euo', 'pipefail'] - input: - tuple(val(sample_id), path(bam)) + tag {"GATK CollectWGSMetrics ${sample_id}"} + label 'GATK_4_1_3_0' + label 'GATK_4_1_3_0_CollectWGSMetrics' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' + shell = ['/bin/bash', '-euo', 'pipefail'] - output: - path ("${sample_id}.wgs_metrics.txt" , emit: wgs_metrics) + input: + tuple(val(sample_id), path(bam)) - script: - """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - CollectWgsMetrics \ - -I $bam \ - -O ${sample_id}.wgs_metrics.txt \ - -R ${params.genome_fasta} \ - --TMP_DIR \$TMPDIR \ - ${params.optional} - sed -i 's/picard\\.analysis\\.WgsMetrics/picard\\.analysis\\.CollectWgsMetrics\\\$WgsMetrics/' ${sample_id}.wgs_metrics.txt + output: + path("${sample_id}.wgs_metrics.txt" , emit: wgs_metrics) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CollectWgsMetrics \ + -I $bam \ + -O ${sample_id}.wgs_metrics.txt \ + -R ${params.genome_fasta} \ + --TMP_DIR \$TMPDIR \ + ${params.optional} + + sed -i 's/picard\\.analysis\\.WgsMetrics/picard\\.analysis\\.CollectWgsMetrics\\\$WgsMetrics/' ${sample_id}.wgs_metrics.txt """ } diff --git a/GATK/4.1.3.0/CombineGVCFs.nf b/GATK/4.1.3.0/CombineGVCFs.nf index ce11cc24..c3cb524b 100644 --- a/GATK/4.1.3.0/CombineGVCFs.nf +++ b/GATK/4.1.3.0/CombineGVCFs.nf @@ -5,8 +5,10 @@ process CombineGVCFs { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(run_id), val(interval), path(gvcf_chunks), path(gvcf_chunk_idxs), path(interval_file)) + output: tuple(val(run_id), val(interval), path("${run_id}.${interval}.g.vcf"), path("${run_id}.${interval}.g.vcf.idx"), path(interval_file), emit: combined_gvcfs) @@ -14,8 +16,7 @@ process CombineGVCFs { vcfs = gvcf_chunks.join(' -V ') """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - CombineGVCFs \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CombineGVCFs \ -R ${params.genome_fasta} \ -V $vcfs \ -O ${run_id}.${interval}.g.vcf \ diff --git a/GATK/4.1.3.0/GatherBaseRecalibrationTables.nf b/GATK/4.1.3.0/GatherBaseRecalibrationTables.nf index 2995cbab..f539b559 100644 --- a/GATK/4.1.3.0/GatherBaseRecalibrationTables.nf +++ b/GATK/4.1.3.0/GatherBaseRecalibrationTables.nf @@ -5,6 +5,7 @@ process GatherBaseRecalibrationTables { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(sample_id), path(bqsr_tables)) @@ -13,9 +14,9 @@ process GatherBaseRecalibrationTables { script: tables = bqsr_tables.join(' -I ') + """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - GatherBQSRReports \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" GatherBQSRReports \ -I $tables \ --output ${sample_id}.recal.table \ --tmp-dir \$TMPDIR diff --git a/GATK/4.1.3.0/GenotypeGVCFs.nf b/GATK/4.1.3.0/GenotypeGVCFs.nf index 1ae4b152..20b87cba 100644 --- a/GATK/4.1.3.0/GenotypeGVCFs.nf +++ b/GATK/4.1.3.0/GenotypeGVCFs.nf @@ -5,6 +5,7 @@ process GenotypeGVCFs { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(run_id), val(interval), path(gvcf), path(gvcfidx), path(interval_file)) @@ -13,8 +14,7 @@ process GenotypeGVCFs { script: """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - GenotypeGVCFs \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" GenotypeGVCFs \ -V $gvcf \ -O ${run_id}.${interval}.vcf \ -R ${params.genome_fasta} \ diff --git a/GATK/4.1.3.0/HaplotypeCaller.nf b/GATK/4.1.3.0/HaplotypeCaller.nf index 710bbffc..71c23387 100644 --- a/GATK/4.1.3.0/HaplotypeCaller.nf +++ b/GATK/4.1.3.0/HaplotypeCaller.nf @@ -5,6 +5,7 @@ process HaplotypeCaller { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(sample_id), path(bam), path(bai), path(interval_file)) @@ -14,9 +15,9 @@ process HaplotypeCaller { script: int_tag = interval_file.toRealPath().toString().split("/")[-2] ext = params.optional =~ /GVCF/ ? '.g.vcf' : '.vcf' + """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - HaplotypeCaller \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" HaplotypeCaller \ ${params.optional} \ -I $bam \ --output ${sample_id}.${int_tag}${ext} \ diff --git a/GATK/4.1.3.0/MergeVCFs.nf b/GATK/4.1.3.0/MergeVCFs.nf index 80f07b1a..c30c87b9 100644 --- a/GATK/4.1.3.0/MergeVCFs.nf +++ b/GATK/4.1.3.0/MergeVCFs.nf @@ -17,8 +17,7 @@ process MergeVCFs { vcfs = vcf_chunks.join(' -INPUT ') """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - SortVcf \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SortVcf \ --INPUT $vcfs \ --OUTPUT ${id}${ext} \ --TMP_DIR \$TMPDIR diff --git a/GATK/4.1.3.0/SamToFastq.nf b/GATK/4.1.3.0/SamToFastq.nf index f5c85dea..60451fc7 100644 --- a/GATK/4.1.3.0/SamToFastq.nf +++ b/GATK/4.1.3.0/SamToFastq.nf @@ -2,7 +2,6 @@ process SamToFastq { tag {"GATK SamToFastq ${sample_id} "} label 'GATK_4_1_3_0' label 'GATK_4_1_3_0_SamToFastq' - clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] @@ -15,8 +14,7 @@ process SamToFastq { script: """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - SamToFastq \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SamToFastq \ ${params.optional} \ --INPUT $bam \ --FASTQ ${sample_id}_${flowcell}_R1_001.fastq.gz \ diff --git a/GATK/4.1.3.0/SelectVariants.nf b/GATK/4.1.3.0/SelectVariants.nf index 837f3189..0f02a529 100644 --- a/GATK/4.1.3.0/SelectVariants.nf +++ b/GATK/4.1.3.0/SelectVariants.nf @@ -5,6 +5,7 @@ process SelectVariants { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(run_id), val(interval), path(vcf), path(vcfidx), val(type)) @@ -13,9 +14,9 @@ process SelectVariants { script: select_type = type == 'SNP' ? '--select-type SNP --select-type NO_VARIATION' : '--select-type INDEL --select-type MIXED' + """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - SelectVariants \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SelectVariants \ -R ${params.genome_fasta} \ -V $vcf \ -O ${run_id}.${interval}.${type}.tmp.vcf \ diff --git a/GATK/4.1.3.0/SplitIntervals.nf b/GATK/4.1.3.0/SplitIntervals.nf index f78006bf..e7e3a7e9 100644 --- a/GATK/4.1.3.0/SplitIntervals.nf +++ b/GATK/4.1.3.0/SplitIntervals.nf @@ -4,6 +4,7 @@ process SplitIntervals { label 'GATK_4_1_3_0_SplitIntervals' container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: val(mode) path(scatter_interval_list) @@ -15,8 +16,7 @@ process SplitIntervals { break_bands_at_multiples_of = mode == 'break' ? 1000000 : 0 """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\${TMPDIR}" \ - IntervalListTools \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\${TMPDIR}" IntervalListTools \ -I ${scatter_interval_list} \ ${params.optional} \ --BREAK_BANDS_AT_MULTIPLES_OF $break_bands_at_multiples_of \ diff --git a/GATK/4.1.3.0/SplitNCigarReads.nf b/GATK/4.1.3.0/SplitNCigarReads.nf index 97a69221..2b1f4dc1 100644 --- a/GATK/4.1.3.0/SplitNCigarReads.nf +++ b/GATK/4.1.3.0/SplitNCigarReads.nf @@ -1,10 +1,10 @@ - process SplitNCigarReads { tag {"GATK SplitNCigarReads ${sample_id}"} label 'GATK_4_1_3_0' label 'GATK_4_1_3_0_SplitNCigarReads' container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(sample_id), path(bam_file), path(bai_file)) @@ -13,8 +13,7 @@ process SplitNCigarReads { script: """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - SplitNCigarReads \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SplitNCigarReads \ --tmp-dir \$TMPDIR \ -R ${params.genome_fasta} \ -I ${bam_file} \ diff --git a/GATK/4.1.3.0/VariantAnnotator.nf b/GATK/4.1.3.0/VariantAnnotator.nf index cc61939d..e429b3e8 100644 --- a/GATK/4.1.3.0/VariantAnnotator.nf +++ b/GATK/4.1.3.0/VariantAnnotator.nf @@ -5,6 +5,7 @@ process VariantAnnotator { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(run_id), path(vcf), path(vcfidx)) @@ -16,8 +17,7 @@ process VariantAnnotator { db_name = db_file.replaceFirst(~/\.[^\.]+$/, '') """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - VariantAnnotator \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" VariantAnnotator \ -R ${params.genome_fasta} \ -V $vcf \ --output ${vcf.baseName}_${db_name}.vcf \ diff --git a/GATK/4.1.3.0/VariantFiltration.nf b/GATK/4.1.3.0/VariantFiltration.nf index eb78bb71..64af5863 100644 --- a/GATK/4.1.3.0/VariantFiltration.nf +++ b/GATK/4.1.3.0/VariantFiltration.nf @@ -5,6 +5,7 @@ process VariantFiltration { clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" container = 'library://sawibo/default/bioinf-tools:gatk4.1.3.0' shell = ['/bin/bash', '-euo', 'pipefail'] + input: tuple(val(run_id), val(interval), val(type), path(vcf), path(vcfidx)) @@ -19,9 +20,9 @@ process VariantFiltration { } else { filter_criteria = "--filter-expression 'QD < 2.0' --filter-expression 'ReadPosRankSum < -20.0' --filter-expression 'FS > 200.0' --filter-name 'INDEL_LowQualityDepth' --filter-name 'INDEL_ReadPosRankSumLow' --filter-name 'INDEL_StrandBias'" } + """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" \ - VariantFiltration \ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" VariantFiltration \ ${params.optional} \ -R $params.genome_fasta \ -V $vcf \ diff --git a/GATK/4.3.0.0/BaseRecalibration.nf b/GATK/4.3.0.0/BaseRecalibration.nf new file mode 100755 index 00000000..16aea546 --- /dev/null +++ b/GATK/4.3.0.0/BaseRecalibration.nf @@ -0,0 +1,33 @@ +process BaseRecalibration { + tag {"GATK BaseRecalibration ${sample_id}.${int_tag}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_BaseRecalibration' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), file(bam), path(bai),path(recal_table), path(interval_file)) + + output: + tuple( + val(sample_id), + val(int_tag), + path("${sample_id}.${int_tag}_recalibrated.bam"), + path("${sample_id}.${int_tag}_recalibrated.bai"), + path(interval_file), + emit: recalibrated_bams + ) + + script: + int_tag = interval_file.toRealPath().toString().split("/")[-2] + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" ApplyBQSR \ + --tmp-dir \$TMPDIR \ + --input $bam \ + --output ${sample_id}.${int_tag}_recalibrated.bam \ + -R ${params.genome_fasta} \ + --create-output-bam-index true \ + --bqsr-recal-file ${recal_table} \ + -L $interval_file + """ +} diff --git a/GATK/4.3.0.0/BaseRecalibrationTable.nf b/GATK/4.3.0.0/BaseRecalibrationTable.nf new file mode 100755 index 00000000..a870d41f --- /dev/null +++ b/GATK/4.3.0.0/BaseRecalibrationTable.nf @@ -0,0 +1,27 @@ +process BaseRecalibrationTable { + tag {"GATK BaseRecalibrationTable ${sample_id}.${int_tag}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_BaseRecalibrationTable' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam), path(bai), path(interval_file)) + + output: + tuple(val(sample_id), path("${sample_id}.${int_tag}.recal.table"), emit: recalibration_tables) + + script: + known = params.genome_known_sites ? '--known-sites ' + params.genome_known_sites.join(' --known-sites ') : '' + int_tag = interval_file.toRealPath().toString().split("/")[-2] + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" BaseRecalibrator \ + --tmp-dir \$TMPDIR \ + --input $bam \ + --output ${sample_id}.${int_tag}.recal.table \ + -R ${params.genome_fasta} \ + $known \ + -L $interval_file + """ +} + diff --git a/GATK/4.3.0.0/CollectMultipleMetrics.nf b/GATK/4.3.0.0/CollectMultipleMetrics.nf new file mode 100755 index 00000000..ee311c50 --- /dev/null +++ b/GATK/4.3.0.0/CollectMultipleMetrics.nf @@ -0,0 +1,23 @@ +process CollectMultipleMetrics { + tag {"GATK CollectMultipleMetrics ${sample_id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_CollectMultipleMetrics' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam)) + + output: + path("${sample_id}.multiple_metrics*", emit : multiple_metrics) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CollectMultipleMetrics \ + --TMP_DIR \$TMPDIR \ + -I $bam \ + -O ${sample_id}.multiple_metrics\ + -R ${params.genome_fasta} \ + ${params.optional} + """ +} diff --git a/GATK/4.3.0.0/CollectWGSMetrics.nf b/GATK/4.3.0.0/CollectWGSMetrics.nf new file mode 100755 index 00000000..8363a103 --- /dev/null +++ b/GATK/4.3.0.0/CollectWGSMetrics.nf @@ -0,0 +1,25 @@ +process CollectWGSMetrics { + tag {"GATK CollectWGSMetrics ${sample_id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_CollectWGSMetrics' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam)) + + output: + path("${sample_id}.wgs_metrics.txt" , emit: wgs_metrics) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CollectWgsMetrics \ + --TMP_DIR \$TMPDIR \ + -I $bam \ + -O ${sample_id}.wgs_metrics.txt \ + -R ${params.genome_fasta} \ + ${params.optional} + + sed -i 's/picard\\.analysis\\.WgsMetrics/picard\\.analysis\\.CollectWgsMetrics\\\$WgsMetrics/' ${sample_id}.wgs_metrics.txt + """ +} diff --git a/GATK/4.3.0.0/CombineGVCFs.nf b/GATK/4.3.0.0/CombineGVCFs.nf new file mode 100755 index 00000000..046842ee --- /dev/null +++ b/GATK/4.3.0.0/CombineGVCFs.nf @@ -0,0 +1,34 @@ +process CombineGVCFs { + tag {"GATK CombineGVCFs ${run_id}.${interval}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_CombineGVCFs' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), val(interval), path(gvcf_chunks), path(gvcf_chunk_idxs), path(interval_file)) + + output: + tuple( + val(run_id), + val(interval), + path("${run_id}.${interval}.g.${ext_vcf}"), + path("${run_id}.${interval}.g.${ext_index}"), + path(interval_file), + emit: combined_gvcfs + ) + + script: + vcfs = gvcf_chunks.join(' -V ') + ext_vcf = params.compress || gvcf_chunks.getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || gvcf_chunks.getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" CombineGVCFs \ + --tmp-dir \$TMPDIR \ + -R ${params.genome_fasta} \ + -V $vcfs \ + -O ${run_id}.${interval}.g.${ext_vcf} \ + -L $interval_file + """ +} diff --git a/GATK/4.3.0.0/GatherBaseRecalibrationTables.nf b/GATK/4.3.0.0/GatherBaseRecalibrationTables.nf new file mode 100755 index 00000000..f814eed0 --- /dev/null +++ b/GATK/4.3.0.0/GatherBaseRecalibrationTables.nf @@ -0,0 +1,21 @@ +process GatherBaseRecalibrationTables { + tag {"GATK GatherBaseRecalibrationTables ${sample_id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_GatherBaseRecalibrationTables' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bqsr_tables)) + + output: + tuple(val(sample_id), path("${sample_id}.recal.table"), emit : gathered_recalibration_tables) + + script: + tables = bqsr_tables.join(' -I ') + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" GatherBQSRReports \ + -I $tables \ + --output ${sample_id}.recal.table + """ +} diff --git a/GATK/4.3.0.0/GenotypeGVCFs.nf b/GATK/4.3.0.0/GenotypeGVCFs.nf new file mode 100755 index 00000000..2149eb3e --- /dev/null +++ b/GATK/4.3.0.0/GenotypeGVCFs.nf @@ -0,0 +1,36 @@ +process GenotypeGVCFs { + tag {"GATK GenotypeGVCFs ${run_id}.${interval}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_GenotypeGVCFs' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), val(interval), path(gvcf), path(gvcftbi), path(interval_file)) + + output: + tuple( + val(run_id), + val(interval), + path("${run_id}.${interval}.${ext_vcf}"), + path("${run_id}.${interval}.${ext_index}"), + path(interval_file), + emit : genotyped_vcfs + ) + + script: + ext_vcf = params.compress || gvcf.getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || gvcf.getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + db = params.genome_dbsnp ? "-D ${params.genome_dbsnp}" : "" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" GenotypeGVCFs \ + --tmp-dir \$TMPDIR \ + -V $gvcf \ + -O ${run_id}.${interval}.${ext_vcf} \ + -R ${params.genome_fasta} \ + ${db} \ + -L $interval_file + """ +} diff --git a/GATK/4.3.0.0/HaplotypeCaller.nf b/GATK/4.3.0.0/HaplotypeCaller.nf new file mode 100755 index 00000000..f3f8a555 --- /dev/null +++ b/GATK/4.3.0.0/HaplotypeCaller.nf @@ -0,0 +1,37 @@ +process HaplotypeCaller { + tag {"GATK HaplotypeCaller ${sample_id}.${int_tag}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_HaplotypeCaller' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam), path(bai), path(interval_file)) + + output: + tuple( + val(sample_id), + val(int_tag), + path("${sample_id}.${int_tag}.${ext_vcf}"), + path("${sample_id}.${int_tag}.${ext_index}"), + path(interval_file), + emit: htcaller_vcfs + ) + + script: + int_tag = interval_file.toRealPath().toString().split("/")[-2] + ext = params.optional =~ /GVCF/ ? 'g.vcf' : 'vcf' + + ext_vcf = params.compress ? "${ext}.gz" : "${ext}" + ext_index = params.compress ? "${ext}.gz.tbi" : "${ext}.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" HaplotypeCaller \ + --tmp-dir \$TMPDIR \ + ${params.optional} \ + -I $bam \ + --output ${sample_id}.${int_tag}.${ext_vcf} \ + -R $params.genome_fasta \ + -L $interval_file + """ +} diff --git a/GATK/4.3.0.0/MergeVCFs.nf b/GATK/4.3.0.0/MergeVCFs.nf new file mode 100755 index 00000000..789d160c --- /dev/null +++ b/GATK/4.3.0.0/MergeVCFs.nf @@ -0,0 +1,26 @@ +process MergeVCFs { + tag {"GATK MergeVCFs ${id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_MergeVCFs' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(id), path(vcf_chunks), path(vcfidxs)) + + output: + tuple(val(id), path("${id}${ext_g}.${ext_vcf}"), path("${id}${ext_g}.${ext_index}"), emit: merged_vcfs) + + script: + ext_g = vcf_chunks[0] =~ /\.g\.vcf/ ? '.g' : '' + vcfs = vcf_chunks.join(' -INPUT ') + ext_vcf = params.compress || vcf_chunks[0].getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || vcf_chunks[0].getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SortVcf \ + --TMP_DIR \$TMPDIR \ + --INPUT $vcfs \ + --OUTPUT ${id}${ext_g}.${ext_vcf} + """ +} diff --git a/GATK/4.3.0.0/SamToFastq.nf b/GATK/4.3.0.0/SamToFastq.nf new file mode 100755 index 00000000..b6d54c22 --- /dev/null +++ b/GATK/4.3.0.0/SamToFastq.nf @@ -0,0 +1,24 @@ +process SamToFastq { + tag {"GATK SamToFastq ${sample_id} "} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_SamToFastq' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(flowcell), val(machine), val(run_nr), path(bam)) + + output: + tuple(val(sample_id), val(flowcell), val(machine), val(run_nr), path("*.fastq.gz"), emit: converted_fastqs) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SamToFastq \ + --TMP_DIR \$TMPDIR \ + ${params.optional} \ + --INPUT $bam \ + --FASTQ ${sample_id}_${flowcell}_R1_001.fastq.gz \ + --SECOND_END_FASTQ ${sample_id}_${flowcell}_R2_001.fastq.gz \ + --INCLUDE_NON_PF_READS true + """ +} diff --git a/GATK/4.3.0.0/SelectVariants.nf b/GATK/4.3.0.0/SelectVariants.nf new file mode 100755 index 00000000..8ff5d26e --- /dev/null +++ b/GATK/4.3.0.0/SelectVariants.nf @@ -0,0 +1,34 @@ +process SelectVariants { + tag {"GATK SelectVariants ${run_id}.${interval}.${type}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_SelectVariants' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), val(interval), path(vcf), path(vcftbi), val(type)) + + output: + tuple( + val(run_id), + val(interval), + val(type), + path("${run_id}.${interval}.${type}.tmp.${ext_vcf}"), + path("${run_id}.${interval}.${type}.tmp.${ext_index}"), + emit: selected_vcfs + ) + + script: + select_type = type == 'SNP' ? '--select-type SNP --select-type NO_VARIATION' : '--select-type INDEL --select-type MIXED' + ext_vcf = params.compress || gvcf_chunks.getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || gvcf_chunks.getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SelectVariants \ + --tmp-dir \$TMPDIR \ + -R ${params.genome_fasta} \ + -V $vcf \ + -O ${run_id}.${interval}.${type}.tmp.${ext_vcf} \ + $select_type + """ +} diff --git a/GATK/4.3.0.0/SplitIntervals.nf b/GATK/4.3.0.0/SplitIntervals.nf new file mode 100755 index 00000000..4c11bdf9 --- /dev/null +++ b/GATK/4.3.0.0/SplitIntervals.nf @@ -0,0 +1,25 @@ +process SplitIntervals { + tag {"GATK SplitIntervals"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_SplitIntervals' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(mode) + path(scatter_interval_list) + + output: + path("temp_*/scattered.interval_list", emit: interval_lists) + + script: + break_bands_at_multiples_of = mode == 'break' ? 1000000 : 0 + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\${TMPDIR}" IntervalListTools \ + -I ${scatter_interval_list} \ + ${params.optional} \ + --BREAK_BANDS_AT_MULTIPLES_OF $break_bands_at_multiples_of \ + -O . + """ +} diff --git a/GATK/4.3.0.0/SplitNCigarReads.nf b/GATK/4.3.0.0/SplitNCigarReads.nf new file mode 100755 index 00000000..f0231763 --- /dev/null +++ b/GATK/4.3.0.0/SplitNCigarReads.nf @@ -0,0 +1,23 @@ +process SplitNCigarReads { + tag {"GATK SplitNCigarReads ${sample_id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_SplitNCigarReads' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${sample_id}.split.bam"), path("${sample_id}.split.bai"), emit: bam_file) + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" SplitNCigarReads \ + --tmp-dir \$TMPDIR \ + -R ${params.genome_fasta} \ + -I ${bam_file} \ + --refactor-cigar-string \ + -O ${sample_id}.split.bam + """ +} diff --git a/GATK/4.3.0.0/VariantAnnotator.nf b/GATK/4.3.0.0/VariantAnnotator.nf new file mode 100755 index 00000000..37631b9e --- /dev/null +++ b/GATK/4.3.0.0/VariantAnnotator.nf @@ -0,0 +1,29 @@ +process VariantAnnotator { + tag {"GATK VariantAnnotator ${run_id}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_VariantAnnotator' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), path(vcf), path(vcfidx)) + + output: + tuple(val(run_id), path("${vcf.baseName}_${db_name}.${ext_vcf}"), path("${vcf.baseName}_${db_name}.${ext_index}"), emit: annotated_vcfs) + + script: + db_file = file(params.genome_variant_annotator_db).getBaseName() + db_name = db_file.replaceFirst(~/\.[^\.]+$/, '') + + ext_vcf = params.compress || vcf.getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || vcf.getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" VariantAnnotator \ + --tmp-dir \$TMPDIR \ + -R ${params.genome_fasta} \ + -V $vcf \ + --output ${vcf.baseName}_${db_name}.${ext_vcf} \ + --dbsnp ${params.genome_variant_annotator_db} \ + """ +} diff --git a/GATK/4.3.0.0/VariantFiltration.nf b/GATK/4.3.0.0/VariantFiltration.nf new file mode 100755 index 00000000..da1e1859 --- /dev/null +++ b/GATK/4.3.0.0/VariantFiltration.nf @@ -0,0 +1,41 @@ +process VariantFiltration { + tag {"GATK VariantFiltration ${run_id}.${interval}.${type}"} + label 'GATK_4_3_0_0' + label 'GATK_4_3_0_0_VariantFiltration' + container = 'broadinstitute/gatk:4.3.0.0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), val(interval), val(type), path(vcf), path(vcftbi)) + + output: + tuple( + val(run_id), + val(interval), + val(type), + path("${run_id}.${interval}.${type}.filtered_variants.${ext_vcf}"), + path("${run_id}.${interval}.${type}.filtered_variants.${ext_index}"), + emit: filtered_vcfs + ) + + script: + if (type == 'SNP'){ + filter_criteria = params.gatk_snp_filter + } else if (type == 'RNA') { + filter_criteria = params.gatk_rna_filter + } else { + filter_criteria = params.gatk_indel_filter + } + ext_vcf = params.compress || vcf.getExtension() == "gz" ? "vcf.gz" : "vcf" + ext_index = params.compress || vcf.getExtension() == "gz" ? "vcf.gz.tbi" : "vcf.idx" + + """ + gatk --java-options "-Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR" VariantFiltration \ + --tmp-dir \$TMPDIR \ + ${params.optional} \ + -R $params.genome_fasta \ + -V $vcf \ + -O ${run_id}.${interval}.${type}.filtered_variants.${ext_vcf} \ + $filter_criteria + """ +} diff --git a/MultiQC/1.13/MultiQC.nf b/MultiQC/1.13/MultiQC.nf new file mode 100755 index 00000000..3732d90a --- /dev/null +++ b/MultiQC/1.13/MultiQC.nf @@ -0,0 +1,18 @@ +process MultiQC { + tag {"MultiQC"} + label 'MultiQC_1_13' + container = 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(analysis_id) + path(qc_files) + + output: + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) + + script: + """ + multiqc ${params.optional} --title ${analysis_id} . + """ +} diff --git a/MultiQC/1.14/MultiQC.nf b/MultiQC/1.14/MultiQC.nf new file mode 100755 index 00000000..2ef8565b --- /dev/null +++ b/MultiQC/1.14/MultiQC.nf @@ -0,0 +1,18 @@ +process MultiQC { + tag {"MultiQC"} + label 'MultiQC_1_14' + container = 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(analysis_id) + path(qc_files) + + output: + tuple(path("${analysis_id}_multiqc_report.html"), path("${analysis_id}_multiqc_report_data"), emit: report) + + script: + """ + multiqc ${params.optional} --title ${analysis_id} . + """ +} diff --git a/Pandocker/21.02/MarkdownToHtml.nf b/Pandocker/21.02/MarkdownToHtml.nf new file mode 100755 index 00000000..d0ff7a37 --- /dev/null +++ b/Pandocker/21.02/MarkdownToHtml.nf @@ -0,0 +1,24 @@ +process MarkdownToHtml { + tag {"Pandocker MarkdownToHtml"} + label 'Pandocker_21_02' + label 'Pandocker_21_02_MarkdownToHtml' + container = 'library://dalibo/pandocker:v21.02' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(md_file) + + output: + path("${md_file.baseName}.html") + + script: + """ + pandoc ${md_file} \ + --variable urlcolor=blue \ + --variable linkcolor=blue \ + -s \ + --toc \ + -f markdown \ + -o ${md_file.baseName}.html + """ +} diff --git a/Pandocker/21.02/MarkdownToPdf.nf b/Pandocker/21.02/MarkdownToPdf.nf index 275f6d78..dc0adb02 100644 --- a/Pandocker/21.02/MarkdownToPdf.nf +++ b/Pandocker/21.02/MarkdownToPdf.nf @@ -16,6 +16,7 @@ process MarkdownToPdf { """ pandoc ${md_file} \ --variable urlcolor=blue \ + --variable linkcolor=blue \ -s \ --toc \ -f markdown \ diff --git a/Picard/2.22.0/AddOrReplaceReadGroups.nf b/Picard/2.22.0/AddOrReplaceReadGroups.nf new file mode 100755 index 00000000..ba205b76 --- /dev/null +++ b/Picard/2.22.0/AddOrReplaceReadGroups.nf @@ -0,0 +1,31 @@ +process AddOrReplaceReadGroups { + tag {"PICARD AddOrReplaceReadGroups ${sample_id}"} + label 'PICARD_2_22_0' + label 'PICARD_2_22_0_AddOrReplaceReadGroups' + container = 'quay.io/biocontainers/picard:2.22.0--0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple (val(sample_id), path("${sample_id}.RG.bam"), path("${sample_id}.RG.bai"), emit: readgroup_bams) + + script: + //check if custom settings have been specified, otherwise use defaults + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR AddOrReplaceReadGroups \ + TMP_DIR=\$TMPDIR \ + INPUT=${bam_file} \ + OUTPUT=${sample_id}.RG.bam \ + RGID=${sample_id} \ + RGLB=${sample_id} \ + RGPL=ILLUMINA \ + RGPU=XXXXYYY \ + RGSM=${sample_id} + + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR BuildBamIndex \ + TMP_DIR=\$TMPDIR \ + INPUT=${sample_id}.RG.bam + """ +} diff --git a/Picard/2.27.5/AddOrReplaceReadGroups.nf b/Picard/2.27.5/AddOrReplaceReadGroups.nf new file mode 100755 index 00000000..b23c5068 --- /dev/null +++ b/Picard/2.27.5/AddOrReplaceReadGroups.nf @@ -0,0 +1,31 @@ +process AddOrReplaceReadGroups { + tag {"PICARD AddOrReplaceReadGroups ${sample_id}"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_AddOrReplaceReadGroups' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${sample_id}.RG.bam"), path("${sample_id}.RG.bai"), emit: readgroup_bams) + + script: + //check if custom settings have been specified, otherwise use defaults + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR AddOrReplaceReadGroups \ + TMP_DIR=\$TMPDIR \ + INPUT=${bam_file} \ + OUTPUT=${sample_id}.RG.bam \ + RGID=${sample_id} \ + RGLB=${sample_id} \ + RGPL=ILLUMINA \ + RGPU=XXXXYYY \ + RGSM=${sample_id} + + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR BuildBamIndex \ + TMP_DIR=\$TMPDIR \ + INPUT=${sample_id}.RG.bam + """ +} diff --git a/Picard/2.27.5/CollectHsMetrics.nf b/Picard/2.27.5/CollectHsMetrics.nf new file mode 100755 index 00000000..4d985fcf --- /dev/null +++ b/Picard/2.27.5/CollectHsMetrics.nf @@ -0,0 +1,25 @@ +process CollectHsMetrics { + tag {"PICARD CollectHsMetrics ${sample_id}"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_CollectHsMetrics' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("${sample_id}.HsMetrics.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR CollectHsMetrics \ + TMP_DIR=\$TMPDIR \ + R=${params.genome} \ + INPUT=${bam_file} \ + OUTPUT=${sample_id}.HsMetrics.txt \ + BAIT_INTERVALS=${params.bait} \ + TARGET_INTERVALS=${params.target} \ + ${params.optional} + """ +} diff --git a/Picard/2.27.5/CollectMultipleMetrics.nf b/Picard/2.27.5/CollectMultipleMetrics.nf new file mode 100755 index 00000000..29f5ec66 --- /dev/null +++ b/Picard/2.27.5/CollectMultipleMetrics.nf @@ -0,0 +1,24 @@ +process CollectMultipleMetrics { + tag {"PICARD CollectMultipleMetrics ${sample_id}"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_CollectMultipleMetrics' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("*.txt", emit: txt_files) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR CollectMultipleMetrics \ + TMP_DIR=\$TMPDIR \ + R=${params.genome} \ + INPUT=${bam_file} \ + OUTPUT=${sample_id} \ + EXT=.txt \ + ${params.optional} + """ +} diff --git a/Picard/2.27.5/CollectWgsMetrics.nf b/Picard/2.27.5/CollectWgsMetrics.nf new file mode 100755 index 00000000..44173541 --- /dev/null +++ b/Picard/2.27.5/CollectWgsMetrics.nf @@ -0,0 +1,23 @@ +process CollectWgsMetrics { + tag {"PICARD CollectWgsMetrics ${sample_id}"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_CollectWgsMetrics' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("${sample_id}.wgs_metrics.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G CollectWgsMetrics -Djava.io.tmpdir=\$TMPDIR \ + TMP_DIR=\$TMPDIR \ + R=${params.genome} \ + INPUT=${bam_file} \ + OUTPUT=${sample_id}.wgs_metrics.txt \ + ${params.optional} + """ +} diff --git a/Picard/2.27.5/CreateSequenceDictionary.nf b/Picard/2.27.5/CreateSequenceDictionary.nf new file mode 100755 index 00000000..91b624c1 --- /dev/null +++ b/Picard/2.27.5/CreateSequenceDictionary.nf @@ -0,0 +1,20 @@ +process CreateSequenceDictionary { + tag {"PICARD CreateSequenceDictionary"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_CreateSequenceDictionary' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(genome_fasta) + + output: + path("${genome_fasta.baseName}.dict", emit: genome_dict) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR CreateSequenceDictionary \ + REFERENCE=${genome_fasta} \ + OUTPUT=${genome_fasta.baseName}.dict + """ +} diff --git a/Picard/2.27.5/EstimateLibraryComplexity.nf b/Picard/2.27.5/EstimateLibraryComplexity.nf new file mode 100755 index 00000000..9b4d30c7 --- /dev/null +++ b/Picard/2.27.5/EstimateLibraryComplexity.nf @@ -0,0 +1,22 @@ +process EstimateLibraryComplexity { + tag {"PICARD EstimateLibraryComplexity ${sample_id}"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_EstimateLibraryComplexity' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("${sample_id}.LibraryComplexity.txt", emit: txt_file) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR EstimateLibraryComplexity \ + TMP_DIR=\$TMPDIR \ + INPUT=${bam_file} \ + OUTPUT=${sample_id}.LibraryComplexity.txt \ + ${params.optional} + """ +} diff --git a/Picard/2.27.5/IntervalListTools.nf b/Picard/2.27.5/IntervalListTools.nf new file mode 100755 index 00000000..c3ca50ae --- /dev/null +++ b/Picard/2.27.5/IntervalListTools.nf @@ -0,0 +1,27 @@ +process IntervalListTools { + tag {"PICARD IntervalListTools"} + label 'PICARD_2_27_5' + label 'PICARD_2_27_5_IntervalListTools' + container = 'quay.io/biocontainers/picard:2.27.5--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(interval_list) + + output: + path("temp_*/*.interval_list", emit: interval_list) + + script: + """ + picard -Xmx${task.memory.toGiga()-4}G -Djava.io.tmpdir=\$TMPDIR IntervalListTools \ + TMP_DIR=\$TMPDIR \ + INPUT=${interval_list} \ + OUTPUT=. \ + SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ + SCATTER_COUNT=${params.scatter_count} \ + UNIQUE=true \ + ${params.optional} + + for folder in temp*; do mv \$folder/scattered.interval_list \$folder/\$folder\\.interval_list; done + """ +} diff --git a/Preseq/2.0.3/LCExtrap.nf b/Preseq/2.0.3/LCExtrap.nf index 417b10b0..404df23c 100644 --- a/Preseq/2.0.3/LCExtrap.nf +++ b/Preseq/2.0.3/LCExtrap.nf @@ -1,20 +1,19 @@ - process LCExtrap { - tag {"Preseq LCExtrap ${sample_id} "} - label 'Preseq_2_0_3' - label 'Preseq_2_0_3_LCExtrap' - container = "quay.io/biocontainers/preseq:2.0.3--hf53bd2b_3" - shell = ['/bin/bash', '-euo', 'pipefail'] +process LCExtrap { + tag {"Preseq LCExtrap ${sample_id} "} + label 'Preseq_2_0_3' + label 'Preseq_2_0_3_LCExtrap' + container = "quay.io/biocontainers/preseq:2.0.3--hf53bd2b_3" + shell = ['/bin/bash', '-euo', 'pipefail'] - input: - tuple(val(sample_id), path(bam_file), path(bai_file)) + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) - output: - tuple(val(sample_id), path("${bam_file.baseName}.ccurve.txt") , emit: ccurve_table) - - script: - //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén - """ - preseq lc_extrap ${params.optional} ${bam_file} -o ${bam_file.baseName}.ccurve.txt - """ - } + output: + tuple(val(sample_id), path("${bam_file.baseName}.ccurve.txt") , emit: ccurve_table) + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + """ + preseq lc_extrap ${params.optional} ${bam_file} -o ${bam_file.baseName}.ccurve.txt + """ +} diff --git a/Preseq/3.2.0/LCExtrap.nf b/Preseq/3.2.0/LCExtrap.nf new file mode 100755 index 00000000..8b4b042b --- /dev/null +++ b/Preseq/3.2.0/LCExtrap.nf @@ -0,0 +1,19 @@ +process LCExtrap { + tag {"Preseq LCExtrap ${sample_id} "} + label 'Preseq_3_2_0' + label 'Preseq_3_2_0_LCExtrap' + container = "quay.io/biocontainers/preseq:3.2.0--h867801b_3" + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.ccurve.txt") , emit: ccurve_table) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + """ + preseq lc_extrap ${params.optional} ${bam_file} -o ${bam_file.baseName}.ccurve.txt + """ +} diff --git a/RSeQC/5.0.1/RSeQC.nf b/RSeQC/5.0.1/RSeQC.nf new file mode 100755 index 00000000..d355f5c7 --- /dev/null +++ b/RSeQC/5.0.1/RSeQC.nf @@ -0,0 +1,50 @@ +process RSeQC { + tag {"RSeQC ${sample_id}"} + label 'RSeQC_5_0_1' + label 'RSeQC_5_0_1' + container = "quay.io/biocontainers/rseqc:5.0.1--py39hbf8eff0_0" + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + path(genome_bed12) + + output: + tuple(val(sample_id), path("*.{txt,pdf,r,xls}"), emit: qc_files) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + """ + inner_distance.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} + read_distribution.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.read_distribution.txt + infer_experiment.py -i ${bam_file} -r ${genome_bed12} > ${bam_file.baseName}.infer_experiment.txt + junction_annotation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} + junction_saturation.py -i ${bam_file} -o ${bam_file.baseName}.rseqc -r ${genome_bed12} 2> ${bam_file.baseName}.junction_annotation_log.txt + read_duplication.py -i ${bam_file} -o ${bam_file.baseName}.read_duplication + """ +} +//Separated from RSeQC due to memory requirements +process RSeQC_TIN { + tag {"RSeQC ${sample_id}"} + label 'RSeQC_5_0_1' + label 'RSeQC_5_0_1_TIN' + container = "quay.io/biocontainers/rseqc:5.0.1--py39hbf8eff0_0" + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + path(genome_bed12) + + output: + tuple(val(sample_id), path("*.{txt,xls}"), emit: tin_stats) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + """ + tin.py -i ${bam_file} -r ${genome_bed12} + """ +} + + + + diff --git a/STAR/2.7.10b/AlignReads.nf b/STAR/2.7.10b/AlignReads.nf new file mode 100755 index 00000000..429608dc --- /dev/null +++ b/STAR/2.7.10b/AlignReads.nf @@ -0,0 +1,38 @@ +process AlignReads { + tag {"STAR AlignReads ${sample_id} "} + label 'STAR_2_7_10b' + label 'STAR_2_7_10b_AlignReads' + container = 'quay.io/biocontainers/star:2.7.10b--h9ee0642_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastqs)) + path(star_genome_index) + path(genome_gtf) + + output: + tuple(val(sample_id), val(rg_id), path("${sample_id}_Aligned.sortedByCoord.out.bam"), emit: bam_file) + path("*Log.final.out", emit: final_log) + path("*Log.out", emit: log) + path("*SJ.out.tab", emit: sj_table) + path("*Unmapped*", optional: true, emit: fastqs_unaligned) + + script: + def barcode = rg_id.split('_')[1] + def avail_mem = task.memory ? "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" : '' + + """ + STAR --genomeDir ${star_genome_index} \ + ${params.optional} \ + --readFilesIn ${fastqs} \ + --outFileNamePrefix ${sample_id}_ \ + --sjdbGTFfile ${genome_gtf} \ + --runDirPerm All_RWX ${avail_mem} \ + --readFilesCommand zcat \ + --outSAMtype BAM SortedByCoordinate \ + --runThreadN ${task.cpus} \ + --outSAMattrRGline ID:${sample_id} LB:${sample_id} PL:IllUMINA PU:${barcode} SM:${sample_id} + + for f in *_Unmapped.*; do gzip \${f}; done + """ +} diff --git a/STAR/2.7.10b/GenomeGenerate.nf b/STAR/2.7.10b/GenomeGenerate.nf new file mode 100755 index 00000000..7c8db208 --- /dev/null +++ b/STAR/2.7.10b/GenomeGenerate.nf @@ -0,0 +1,29 @@ +process GenomeGenerate { + tag {"STAR GenomeGenerate ${genome_fasta.baseName} "} + label 'STAR_2_7_10b' + label 'STAR_2_7_10b_GenomeGenerate' + container = 'quay.io/biocontainers/star:2.7.10b--h9ee0642_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(genome_fasta) + path(genome_gtf) + + output: + path("star_genome", emit: star_path) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + def avail_mem = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + + """ + mkdir -p star_genome + STAR \ + --runMode genomeGenerate \ + --runThreadN ${task.cpus} \ + --sjdbGTFfile ${genome_gtf} \ + --genomeDir star_genome/ \ + --genomeFastaFiles ${genome_fasta} \ + $avail_mem + """ +} diff --git a/STAR/2.7.3a/AlignReads.nf b/STAR/2.7.3a/AlignReads.nf index b12a4abf..a02c3466 100644 --- a/STAR/2.7.3a/AlignReads.nf +++ b/STAR/2.7.3a/AlignReads.nf @@ -10,7 +10,6 @@ process AlignReads { path(star_genome_index) path(genome_gtf) - output: tuple(val(sample_id), val(rg_id), path("${sample_id}_Aligned.sortedByCoord.out.bam"), emit: bam_file) path("*Log.final.out", emit: final_log) @@ -18,8 +17,6 @@ process AlignReads { path("*SJ.out.tab", emit: sj_table) path("*Unmapped*", optional: true, emit: fastqs_unaligned) - - script: def barcode = rg_id.split('_')[1] def avail_mem = task.memory ? "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" : '' @@ -34,5 +31,7 @@ process AlignReads { --outSAMtype BAM SortedByCoordinate \ --runThreadN ${task.cpus} \ --outSAMattrRGline ID:${sample_id} LB:${sample_id} PL:IllUMINA PU:${barcode} SM:${sample_id} + + for f in *_Unmapped.*; do gzip \${f}; done """ } diff --git a/STAR/2.7.3a/GenomeGenerate.nf b/STAR/2.7.3a/GenomeGenerate.nf index 4a4be253..fc84efdd 100644 --- a/STAR/2.7.3a/GenomeGenerate.nf +++ b/STAR/2.7.3a/GenomeGenerate.nf @@ -9,21 +9,19 @@ process GenomeGenerate { path(genome_fasta) path(genome_gtf) - output: - path("${genome_fasta.baseName}", emit: star_index) - + path("star_genome", emit: star_path) script: //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén def avail_mem = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' """ - mkdir ${genome_fasta.baseName} + mkdir -p star_genome STAR \ --runMode genomeGenerate \ --runThreadN ${task.cpus} \ --sjdbGTFfile ${genome_gtf} \ - --genomeDir ${genome_fasta.baseName}/ \ + --genomeDir star_genome/ \ --genomeFastaFiles ${genome_fasta} \ $avail_mem """ diff --git a/STARFusion/1.11.1/STARFusion.nf b/STARFusion/1.11.1/STARFusion.nf new file mode 100755 index 00000000..99b9b7e8 --- /dev/null +++ b/STARFusion/1.11.1/STARFusion.nf @@ -0,0 +1,60 @@ +process STARFusion { + tag {"STARFusion ${sample_id}"} + label 'STARFusion_1_11_1' + container = 'quay.io/biocontainers/star-fusion:1.11.1--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(fastq_files)) + path(star_index) + path(genome_lib) + + output: + tuple(val(sample_id), path("${sample_id}_star-fusion.tsv"), emit: fusion_predictions) + path("*.{tsv,txt}", emit: fusion_abridged) + + script: + //Adapted code from: https://github.com/nf-core/rnafusion - MIT License - Copyright (c) Martin Proks + def avail_mem = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def read_args = params.single_end ? "--left_fq ${fastq_files[0]}" : "--left_fq ${fastq_files[0]} --right_fq ${fastq_files[1]}" + + """ + STAR \ + --genomeDir ${star_index} \ + --readFilesIn ${fastq_files} \ + --twopassMode Basic \ + --outReadsUnmapped None \ + --chimSegmentMin 12 \ + --chimJunctionOverhangMin 12 \ + --alignSJDBoverhangMin 10 \ + --alignMatesGapMax 100000 \ + --alignIntronMax 100000 \ + --chimSegmentReadGapMax 3 \ + --alignSJstitchMismatchNmax 5 -1 5 5 \ + --runThreadN ${task.cpus} \ + --outSAMstrandField intronMotif ${avail_mem} \ + --outSAMunmapped Within \ + --outSAMtype BAM Unsorted \ + --outSAMattrRGline ID:GRPundef \ + --chimMultimapScoreRange 10 \ + --chimMultimapNmax 10 \ + --chimNonchimScoreDropMin 10 \ + --peOverlapNbasesMin 12 \ + --peOverlapMMp 0.1 \ + --readFilesCommand zcat \ + --sjdbOverhang 100 \ + --chimOutJunctionFormat 1 + + STAR-Fusion \ + --genome_lib_dir ${genome_lib} \ + -J Chimeric.out.junction \ + ${read_args} \ + --CPU ${task.cpus} \ + ${params.optional} \ + --output_dir . + + mv star-fusion.fusion_predictions.tsv ${sample_id}_star-fusion.tsv + mv star-fusion.fusion_predictions.abridged.tsv ${sample_id}_abridged.tsv + mv star-fusion.fusion_predictions.abridged.coding_effect.tsv ${sample_id}_abridged.coding_effect.tsv + """ +} diff --git a/Salmon/1.9.0/Index.nf b/Salmon/1.9.0/Index.nf new file mode 100755 index 00000000..6392fe1d --- /dev/null +++ b/Salmon/1.9.0/Index.nf @@ -0,0 +1,20 @@ +process Index { + tag {"Salmon Index ${transcripts_fasta.baseName}"} + label 'Salmon_1_9_0' + label 'Salmon_1_9_0_Index' + container = 'quay.io/biocontainers/salmon:1.9.0--h7e5ed60_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(transcripts_fasta) + + output: + path("${transcripts_fasta.baseName}/", emit: salmon_index) + + script: + def gencode = params.gencode ? "--gencode" : "" + """ + salmon index --threads ${task.cpus} -t ${transcripts_fasta} ${params.optional} ${gencode} -i ${transcripts_fasta.baseName} + """ +} + diff --git a/Salmon/1.9.0/Quant.nf b/Salmon/1.9.0/Quant.nf new file mode 100755 index 00000000..b5c397b3 --- /dev/null +++ b/Salmon/1.9.0/Quant.nf @@ -0,0 +1,36 @@ +process Quant { + tag {"Salmon Quant ${sample_id}"} + label 'Salmon_1_9_0' + label 'Salmon_1_9_0_Quant' + container = 'quay.io/biocontainers/salmon:1.9.0--h7e5ed60_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(fastq_files)) + path(salmon_index) + + output: + tuple(val(sample_id), path("${sample_id}/"), emit: quant_table) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + def rnastrandness = params.single_end ? 'U' : 'IU' + if (params.stranded && !params.unstranded) { + rnastrandness = params.single_end ? 'SF' : 'ISF' + } else if (params.revstranded && !params.unstranded) { + rnastrandness = params.single_end ? 'SR' : 'ISR' + } + def endedness = params.single_end ? "-r ${fastq_files[0]}" : "-1 ${fastq_files[0]} -2 ${fastq_files[1]}" + def unmapped = params.saveUnaligned ? "--writeUnmappedNames" : '' + + """ + salmon quant --validateMappings \ + ${params.optional} \ + --threads ${task.cpus} \ + --libType=${rnastrandness} \ + --index ${salmon_index} \ + ${endedness} ${unmapped} \ + -o ${sample_id} + """ +} + diff --git a/Salmon/1.9.0/QuantMerge.nf b/Salmon/1.9.0/QuantMerge.nf new file mode 100755 index 00000000..9e40b7c3 --- /dev/null +++ b/Salmon/1.9.0/QuantMerge.nf @@ -0,0 +1,24 @@ +process QuantMerge { + tag {"Salmon QuantMerge ${run_name}"} + label 'Salmon_1_9_0' + label 'Salmon_1_9_0_QuantMerge' + container = 'quay.io/biocontainers/salmon:1.9.0--h7e5ed60_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_name) + path(quant_dirs) + + output: + path("*.txt", emit: quant_tables_merged) + + script: + def quants = quant_dirs.collect{ "$it" }.join(",") + """ + salmon quantmerge --column numreads --quants {${quants}} -o ${run_name}_transcripts_quantmerge_numReads.txt + salmon quantmerge --column tpm --quants {${quants}} -o ${run_name}_transcripts_quantmerge_TPM.txt + salmon quantmerge --column len --quants {${quants}} -o ${run_name}_transcripts_quantmerge_Length.txt + salmon quantmerge --column elen --quants {${quants}} -o ${run_name}_transcripts_quantmerge_EffectiveLength.txt + """ +} + diff --git a/Sambamba/0.8.2/Flagstat.nf b/Sambamba/0.8.2/Flagstat.nf new file mode 100755 index 00000000..b3e162ef --- /dev/null +++ b/Sambamba/0.8.2/Flagstat.nf @@ -0,0 +1,18 @@ +process Flagstat { + tag {"Sambamba Flagstat ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_Flagstat' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("${bam_file.baseName}.flagstat", emit: flagstat) + + script: + """ + sambamba flagstat -t ${task.cpus} ${bam_file} > ${bam_file.baseName}.flagstat + """ +} diff --git a/Sambamba/0.8.2/Index.nf b/Sambamba/0.8.2/Index.nf new file mode 100755 index 00000000..e9dd7267 --- /dev/null +++ b/Sambamba/0.8.2/Index.nf @@ -0,0 +1,18 @@ +process Index { + tag {"Sambamba Index ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_Index' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file)) + + output: + tuple(val(sample_id), path("${bam_file}.bai"), emit: bai_file) + + script: + """ + sambamba index -t ${task.cpus} ${bam_file} ${bam_file}.bai + """ +} diff --git a/Sambamba/0.8.2/Markdup.nf b/Sambamba/0.8.2/Markdup.nf new file mode 100755 index 00000000..cadac40f --- /dev/null +++ b/Sambamba/0.8.2/Markdup.nf @@ -0,0 +1,37 @@ +process Markdup { + tag {"Sambamba Markdup ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_Markdup' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), val(rg_id), path("${sample_id}.markdup.bam"), path("${sample_id}.markdup.bam.bai"), emit: bam_file) + + script: + """ + sambamba markdup -t ${task.cpus} ${bam_file} ${sample_id}.markdup.bam + """ +} + +process MarkdupMerge { + tag {"Sambamba MarkdupMerge ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_MarkdupMerge' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_files)) + + output: + tuple(val(sample_id), path("${sample_id}.markdup.bam"), path("${sample_id}.markdup.bam.bai"), emit: bam_file) + + script: + """ + sambamba markdup -t ${task.cpus} ${bam_files} ${sample_id}.markdup.bam + """ +} diff --git a/Sambamba/0.8.2/Merge.nf b/Sambamba/0.8.2/Merge.nf new file mode 100755 index 00000000..8bee8a0d --- /dev/null +++ b/Sambamba/0.8.2/Merge.nf @@ -0,0 +1,18 @@ +process Merge { + tag {"Sambamba Merge ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_Merge' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_files), path(bai_files)) + + output: + tuple(val(sample_id), path("${sample_id}.bam"), path("${sample_id}.bam.bai"), emit: bam_file) + + script: + """ + sambamba merge -t ${task.cpus} ${sample_id}.bam ${bam_files} + """ +} diff --git a/Sambamba/0.8.2/MergeBams.nf b/Sambamba/0.8.2/MergeBams.nf new file mode 100755 index 00000000..f1f609e7 --- /dev/null +++ b/Sambamba/0.8.2/MergeBams.nf @@ -0,0 +1,21 @@ +process MergeBams { + tag {"Sambamba MergeBams ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_MergeBams' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bams), path(bais)) + + output: + tuple(val(sample_id), path("${sample_id}_${ext}"), path("${sample_id}_${ext}.bai"), emit: merged_bams) + + script: + ext = bams[0].toRealPath().toString().split("_")[-1] + + """ + sambamba merge -t ${task.cpus} ${sample_id}_${ext} ${bams} + sambamba index -t ${task.cpus} ${sample_id}_${ext} ${sample_id}_${ext}.bai + """ +} diff --git a/Sambamba/0.8.2/ViewSort.nf b/Sambamba/0.8.2/ViewSort.nf new file mode 100755 index 00000000..9bfef6ca --- /dev/null +++ b/Sambamba/0.8.2/ViewSort.nf @@ -0,0 +1,18 @@ +process ViewSort { + tag {"Sambamba ViewSort ${sample_id} - ${rg_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_ViewSort' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(sam_file)) + + output: + tuple(val(sample_id), val(rg_id), path("${sam_file.baseName}.sort.bam"), path("${sam_file.baseName}.sort.bam.bai"), emit: bam_file) + + script: + """ + sambamba view -t ${task.cpus} -S -f bam ${sam_file} | sambamba sort -t ${task.cpus} -m ${task.memory.toGiga()}G -o ${sam_file.baseName}.sort.bam /dev/stdin + """ +} diff --git a/Sambamba/0.8.2/ViewUnmapped.nf b/Sambamba/0.8.2/ViewUnmapped.nf new file mode 100755 index 00000000..9175421d --- /dev/null +++ b/Sambamba/0.8.2/ViewUnmapped.nf @@ -0,0 +1,19 @@ +process ViewUnmapped { + tag {"Sambamba ViewUnmapped ${sample_id}"} + label 'Sambamba_0_8_2' + label 'Sambamba_0_8_2_ViewUnmapped' + container = 'quay.io/biocontainers/sambamba:0.8.2--h98b6b92_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.unmapped.bam"), path("${bam_file.baseName}.unmapped.bam.bai"), emit: bam_file) + + script: + """ + sambamba view -t ${task.cpus} -f bam -F 'unmapped and mate_is_unmapped' ${bam_file} > ${bam_file.baseName}.unmapped.bam + sambamba index -t ${task.cpus} ${bam_file.baseName}.unmapped.bam + """ +} diff --git a/Samtools/1.10/Faidx.nf b/Samtools/1.10/Faidx.nf new file mode 100755 index 00000000..dd5ddd66 --- /dev/null +++ b/Samtools/1.10/Faidx.nf @@ -0,0 +1,18 @@ +process Faidx { + tag {"Samtools Faidx ${fasta}"} + label 'Samtools_1_10' + label 'Samtools_1_10_Faidx' + container = 'quay.io/biocontainers/samtools:1.10--h9402c20_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(fasta) + + output: + path("${fasta.name}.fai", emit: genome_faidx) + + script: + """ + samtools faidx ${fasta} + """ +} diff --git a/Samtools/1.10/SamToCram.nf b/Samtools/1.10/SamToCram.nf new file mode 100755 index 00000000..5d4d440e --- /dev/null +++ b/Samtools/1.10/SamToCram.nf @@ -0,0 +1,19 @@ +process SamToCram { + tag {"Samtools SamToCram ${sample_id}"} + label 'Samtools_1_10' + label 'Samtools_1_10_SamToCram' + container = 'quay.io/biocontainers/samtools:1.10--h9402c20_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + path(genome_fasta) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.cram"), emit: cram_file) + + script: + """ + samtools view --threads ${task.cpus} -O cram,embed_ref -T ${genome_fasta} -C -o ${bam_file.baseName}.cram ${bam_file} + """ +} diff --git a/Samtools/1.16.1/Faidx.nf b/Samtools/1.16.1/Faidx.nf new file mode 100755 index 00000000..d65f2601 --- /dev/null +++ b/Samtools/1.16.1/Faidx.nf @@ -0,0 +1,18 @@ +process Faidx { + tag {"Samtools Faidx ${fasta}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_Faidx' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(fasta) + + output: + path("${fasta.name}.fai", emit: genome_faidx) + + script: + """ + samtools faidx ${fasta} + """ +} diff --git a/Samtools/1.16.1/Flagstat.nf b/Samtools/1.16.1/Flagstat.nf new file mode 100755 index 00000000..73aed607 --- /dev/null +++ b/Samtools/1.16.1/Flagstat.nf @@ -0,0 +1,18 @@ +process Flagstat { + tag {"Samtools Flagstat ${sample_id}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_Flagstat' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + path("${bam_file.baseName}.flagstat", emit: flagstat) + + script: + """ + samtools flagstat ${bam_file} > ${bam_file.baseName}.flagstat + """ +} diff --git a/Samtools/1.16.1/MPileup.nf b/Samtools/1.16.1/MPileup.nf new file mode 100755 index 00000000..3588a605 --- /dev/null +++ b/Samtools/1.16.1/MPileup.nf @@ -0,0 +1,39 @@ +process MPileup { + // samtools mpileup can produce multiple output types, this process creates pileup files. + tag {"Samtools MPileup ${sample_id}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_MPileup' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.pileup"), emit: pileup) + + script: + """ + samtools mpileup ${params.optional} -f ${params.genome} ${bam_file} > ${bam_file.baseName}.pileup + """ +} + +process MPileup_bcf { + // samtools mpileup can produce multiple output types, this process creates bcf files. + tag {"Samtools MPileup_bcf ${sample_id}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_MPileup_bcf' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.bcf"), emit: bcf) + + script: + """ + samtools mpileup ${params.optional} -u -f ${params.genome} ${bam_file} > ${bam_file.baseName}.bcf + """ +} diff --git a/Samtools/1.16.1/SamToCram.nf b/Samtools/1.16.1/SamToCram.nf new file mode 100755 index 00000000..5247072b --- /dev/null +++ b/Samtools/1.16.1/SamToCram.nf @@ -0,0 +1,21 @@ +process SamToCram { + tag {"Samtools SamToCram ${bam_file}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_SamToCram' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(path(bam_file), path(bai_file)) + path(genome_fasta) + + output: + tuple(path("${bam_file.baseName}.cram"), path("${bam_file.baseName}.cram.crai"), emit: cram_file) + + script: + output_options = params.cram_embedref ? "cram,embed_ref" : "cram" + """ + samtools view --threads ${task.cpus} -O ${output_options} -T ${genome_fasta} -C -o ${bam_file.baseName}.cram ${bam_file} + samtools index -@ ${task.cpus} ${bam_file.baseName}.cram + """ +} diff --git a/Samtools/1.16.1/View.nf b/Samtools/1.16.1/View.nf new file mode 100755 index 00000000..61706633 --- /dev/null +++ b/Samtools/1.16.1/View.nf @@ -0,0 +1,18 @@ +process View { + tag {"Samtools View ${sample_id}"} + label 'Samtools_1_16_1' + label 'Samtools_1_16_1_View' + container = 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), path(bam_file), path(bai_file)) + + output: + tuple(val(sample_id), path("${bam_file.baseName}.sam"), emit: sam_file) + + script: + """ + samtools view ${params.optional} ${bam_file} ${params.region} > ${bam_file.baseName}.sam + """ +} diff --git a/SortMeRNA/4.2.0/SortMeRNA.nf b/SortMeRNA/4.2.0/SortMeRNA.nf index d1a9a06c..cb7eb159 100644 --- a/SortMeRNA/4.2.0/SortMeRNA.nf +++ b/SortMeRNA/4.2.0/SortMeRNA.nf @@ -1,5 +1,5 @@ process SortMeRNA { - tag {"SortMeRNA ${sample_id} - ${rg_id}"} + tag {"SortMeRNA ${sample_id}"} label 'SortMeRNA_4_2_0' container = 'quay.io/biocontainers/sortmerna:4.2.0--0' shell = ['/bin/bash', '-euo', 'pipefail'] @@ -8,18 +8,17 @@ process SortMeRNA { tuple(val(sample_id), val(rg_id), path(fastq_files)) path(db_fasta) - output: tuple(val(sample_id), val(rg_id), path("*_non_rRNA.fastq.gz"), emit: non_rRNA_fastqs) path("*_filtered_rRNA.fastq.gz", emit: rRNA_fastqs) path("*_rRNA_report.txt", emit: qc_report) script: - def Refs = db_fasta.collect{ "$it" }.join(" -ref ") + def refs = db_fasta.collect{ "$it" }.join(" -ref ") def report_title = fastq_files[0].simpleName.split("_R1_")[0] if (params.single_end) { """ - sortmerna -ref ${Refs} \ + sortmerna -ref ${refs} \ -reads ${fastq_files} \ --num_alignments 1 \ --threads ${task.cpus} \ @@ -36,7 +35,7 @@ process SortMeRNA { """ } else { """ - sortmerna -ref ${Refs} \ + sortmerna -ref ${refs} \ -reads ${fastq_files[0]} -reads ${fastq_files[1]} \ --num_alignments 1 \ --threads ${task.cpus} \ diff --git a/SortMeRNA/4.3.3/SortMeRNA.nf b/SortMeRNA/4.3.3/SortMeRNA.nf index 074de0cd..2c0a51ea 100755 --- a/SortMeRNA/4.3.3/SortMeRNA.nf +++ b/SortMeRNA/4.3.3/SortMeRNA.nf @@ -1,5 +1,5 @@ process SortMeRNA { - tag {"SortMeRNA ${sample_id} - ${rg_id}"} + tag {"SortMeRNA ${sample_id}"} label 'SortMeRNA_4_3_3' container = 'quay.io/biocontainers/sortmerna:4.3.3--h9ee0642_0' shell = ['/bin/bash', '-euo', 'pipefail'] diff --git a/SortMeRNA/4.3.6/SortMeRNA.nf b/SortMeRNA/4.3.6/SortMeRNA.nf new file mode 100755 index 00000000..5e61b9e9 --- /dev/null +++ b/SortMeRNA/4.3.6/SortMeRNA.nf @@ -0,0 +1,55 @@ +process SortMeRNA { + tag {"SortMeRNA ${sample_id}"} + label 'SortMeRNA_4_3_6' + container = 'quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq_files)) + path(db_fasta) + + output: + tuple(val(sample_id), val(rg_id), path("*_non_rRNA.fastq.gz"), emit: non_rRNA_fastqs) + path("*_filtered_rRNA.fastq.gz", emit: rRNA_fastqs) + path("*_rRNA_report.txt", emit: qc_report) + + script: + def refs = db_fasta.collect{ "$it" }.join(" -ref ") + def report_title = fastq_files[0].simpleName.split("_R1_")[0] + if (params.single_end) { + """ + sortmerna -ref ${refs} \ + -reads ${fastq_files} \ + --num_alignments 1 \ + --threads ${task.cpus} \ + --fastx \ + -workdir \${PWD} \ + --aligned rRNA-reads \ + --other non-rRNA-reads \ + --zip-out + + mv non-rRNA-reads.fq.gz ${fastq_files[0].simpleName}_non_rRNA.fastq.gz + mv rRNA-reads.fq.gz ${fastq_files[0].simpleName}_filtered_rRNA.fastq.gz + mv rRNA-reads.log ${report_title}_rRNA_report.txt + """ + } else { + """ + sortmerna -ref ${refs} \ + -reads ${fastq_files[0]} -reads ${fastq_files[1]} \ + --num_alignments 1 \ + --threads ${task.cpus} \ + -workdir \${PWD} \ + --fastx -paired_in \ + --aligned rRNA-reads \ + --other non-rRNA-reads \ + -out2 \ + --zip-out + + mv non-rRNA-reads_fwd.fq.gz ${fastq_files[0].simpleName}_non_rRNA.fastq.gz + mv non-rRNA-reads_rev.fq.gz ${fastq_files[1].simpleName}_non_rRNA.fastq.gz + mv rRNA-reads_fwd.fq.gz ${fastq_files[0].simpleName}_filtered_rRNA.fastq.gz + mv rRNA-reads_rev.fq.gz ${fastq_files[1].simpleName}_filtered_rRNA.fastq.gz + mv rRNA-reads.log ${report_title}_rRNA_report.txt + """ + } +} diff --git a/Subread/2.0.1/FeatureCounts.nf b/Subread/2.0.1/FeatureCounts.nf new file mode 100755 index 00000000..a07eece0 --- /dev/null +++ b/Subread/2.0.1/FeatureCounts.nf @@ -0,0 +1,41 @@ +process FeatureCounts { + tag {"Subread FeatureCounts ${run_id}"} + label 'Subread_2_0_1' + label 'Subread_2_0_1_FeatureCounts' + container = 'quay.io/biocontainers/subread:2.0.1--h7132678_2' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + path(bam_file) + path(genome_gtf) + + output: + path("${run_id}_${params.fc_count_type}_featureCounts.raw.txt", emit: count_table) + path("${run_id}_${params.fc_count_type}_featureCounts.txt.summary", emit: count_summary) + path("${run_id}_biotype_featureCounts.matrix.txt", emit: biotype_count_table, optional: true) + path("${run_id}_biotype_featureCounts.txt.summary", emit: biotype_count_summary, optional: true) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + def bam_list = bam_file.collect{ "$it" }.join(" ") + def biotype = params.gencode ? "gene_type" : params.fc_group_features_type + def extraAttributes = params.fc_extra_attributes ? "--extraAttributes ${params.fc_extra_attributes}" : '' + def fragment_mode = !params.single_end ? "-p": '' + //Get strandedness + def featureCounts_direction = 0 + if (params.stranded && !params.unstranded) { + featureCounts_direction = 1 + } else if (params.revstranded && !params.unstranded) { + featureCounts_direction = 2 + } + //optional biotype QC + def biotype_qc = params.biotypeQC ? "featureCounts -a ${genome_gtf} -g ${biotype} -o ${run_id}_biotype_featureCounts.txt -s ${featureCounts_direction} ${fragment_mode} ${params.optional} ${bam_file}": '' + def mod_biotype = params.biotypeQC ? "cut -f 1,7 ${run_id}_biotype_featureCounts.txt | tail -n +2 | sed 's/\\_Aligned.sortedByCoord.out.bam\\>//g' > ${run_id}_biotype_featureCounts.matrix.txt": '' + """ + featureCounts -T ${task.cpus} -a ${genome_gtf} -t ${params.fc_count_type} -g ${params.fc_group_features} -o ${run_id}_${params.fc_count_type}_featureCounts.txt -s ${featureCounts_direction} ${fragment_mode} ${params.optional} ${extraAttributes} ${bam_list} + tail -n +2 ${run_id}_${params.fc_count_type}_featureCounts.txt | sed 's/\\_Aligned.sortedByCoord.out.bam\\>//g' > "${run_id}_${params.fc_count_type}_featureCounts.raw.txt" + ${biotype_qc} + ${mod_biotype} + """ +} diff --git a/Subread/2.0.3/FeatureCounts.nf b/Subread/2.0.3/FeatureCounts.nf new file mode 100755 index 00000000..6ff16f7f --- /dev/null +++ b/Subread/2.0.3/FeatureCounts.nf @@ -0,0 +1,41 @@ +process FeatureCounts { + tag {"Subread FeatureCounts ${run_id}"} + label 'Subread_2_0_3' + label 'Subread_2_0_3_FeatureCounts' + container = 'quay.io/biocontainers/subread:2.0.3--h7132678_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + path(bam_file) + path(genome_gtf) + + output: + path("${run_id}_${params.fc_count_type}_featureCounts.raw.txt", emit: count_table) + path("${run_id}_${params.fc_count_type}_featureCounts.txt.summary", emit: count_summary) + path("${run_id}_biotype_featureCounts.matrix.txt", emit: biotype_count_table, optional: true) + path("${run_id}_biotype_featureCounts.txt.summary", emit: biotype_count_summary, optional: true) + + script: + //Adapted code from: https://github.com/nf-core/rnaseq - MIT License - Copyright (c) Phil Ewels, Rickard Hammarén + def bam_list = bam_file.collect{ "$it" }.join(" ") + def biotype = params.gencode ? "gene_type" : params.fc_group_features_type + def extraAttributes = params.fc_extra_attributes ? "--extraAttributes ${params.fc_extra_attributes}" : '' + def fragment_mode = !params.single_end ? "-p": '' + //Get strandedness + def featureCounts_direction = 0 + if (params.stranded && !params.unstranded) { + featureCounts_direction = 1 + } else if (params.revstranded && !params.unstranded) { + featureCounts_direction = 2 + } + //optional biotype QC + def biotype_qc = params.biotypeQC ? "featureCounts -a ${genome_gtf} -g ${biotype} -o ${run_id}_biotype_featureCounts.txt -s ${featureCounts_direction} ${fragment_mode} ${params.optional} ${bam_file}": '' + def mod_biotype = params.biotypeQC ? "cut -f 1,7 ${run_id}_biotype_featureCounts.txt | tail -n +2 | sed 's/\\_Aligned.sortedByCoord.out.bam\\>//g' > ${run_id}_biotype_featureCounts.matrix.txt": '' + """ + featureCounts -T ${task.cpus} -a ${genome_gtf} -t ${params.fc_count_type} -g ${params.fc_group_features} -o ${run_id}_${params.fc_count_type}_featureCounts.txt -s ${featureCounts_direction} ${fragment_mode} ${params.optional} ${extraAttributes} ${bam_list} + tail -n +2 ${run_id}_${params.fc_count_type}_featureCounts.txt | sed 's/\\_Aligned.sortedByCoord.out.bam\\>//g' > "${run_id}_${params.fc_count_type}_featureCounts.raw.txt" + ${biotype_qc} + ${mod_biotype} + """ +} diff --git a/Tabix/1.11/BgzipTabix.nf b/Tabix/1.11/BgzipTabix.nf new file mode 100644 index 00000000..e300a5d8 --- /dev/null +++ b/Tabix/1.11/BgzipTabix.nf @@ -0,0 +1,27 @@ +process Tabix_BgzipTabix { + tag {"Tabix BgzipTabix ${vcf.name}"} + label 'Tabix_1_11' + label 'Tabix_1_11_BgzipTabix' + container = 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(vcf) + + output: + tuple(path("${vcf.simpleName}.vcf.gz"), path("${vcf.simpleName}.vcf.gz.tbi"), emit: vcf) + //tuple(path("${vcf.name}.gz"), path("${vcf.name}.gz.tbi"), emit: vcf) + + script: + ext=vcf.getExtension() + if(ext!="gz"){ + """ + bgzip -c ${vcf.name} > ${vcf.simpleName}.vcf.gz + tabix -p vcf ${vcf.simpleName}.vcf.gz + """ + } else { + """ + tabix -p vcf ${vcf.simpleName}.vcf.gz + """ + } +} \ No newline at end of file diff --git a/Tabix/1.11/Tabix.nf b/Tabix/1.11/Tabix.nf new file mode 100644 index 00000000..73f5bc69 --- /dev/null +++ b/Tabix/1.11/Tabix.nf @@ -0,0 +1,18 @@ +process Tabix_Tabix { + tag {"Tabix Tabix ${vcf.name}"} + label 'Tabix_1_11' + label 'Tabix_1_11_Tabix' + container = 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(vcf) + + output: + tuple(path("${vcf).name}", path("${vcf.name}.tbi"), emit: vcf) + + script: + """ + tabix -p vcf ${vcf.name} + """ +} \ No newline at end of file diff --git a/TrimGalore/0.6.7/TrimGalore.nf b/TrimGalore/0.6.7/TrimGalore.nf new file mode 100755 index 00000000..6b39d93a --- /dev/null +++ b/TrimGalore/0.6.7/TrimGalore.nf @@ -0,0 +1,28 @@ +process TrimGalore { + tag {"TrimGalore ${sample_id} - ${rg_id}"} + label 'TrimGalore_0_6_7' + container = 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(sample_id), val(rg_id), path(fastq_files)) + + output: + tuple(val(sample_id), val(rg_id), path("*fastq.gz"), emit: fastqs_trimmed) + path("*trimming_report.txt", emit: trimming_report) + path("*_fastqc.{zip,html}", optional: true, emit: fastqc_report) + + script: + if (params.single_end) { + """ + trim_galore ${fastq_files} --gzip ${params.optional} + mv ${fastq_files[0].simpleName}_trimmed.fq.gz ${fastq_files[0].simpleName}_trimmed.fastq.gz + """ + } else { + """ + trim_galore ${fastq_files} --paired --gzip ${params.optional} + mv ${fastq_files[0].simpleName}_val_1.fq.gz ${fastq_files[0].simpleName}_trimmed.fastq.gz + mv ${fastq_files[1].simpleName}_val_2.fq.gz ${fastq_files[1].simpleName}_trimmed.fastq.gz + """ + } +} diff --git a/Utils/CreateBed.nf b/Utils/CreateBed.nf new file mode 100755 index 00000000..a174ce5c --- /dev/null +++ b/Utils/CreateBed.nf @@ -0,0 +1,16 @@ +process CreateBed { + tag {"CreateBed ${genome_index.baseName}"} + label 'CreateBed' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(genome_index) + + output: + path("${genome_index.baseName}.bed", emit: genome_bed) + + script: + """ + awk -v FS='\t' -v OFS='\t' '{ print \$1, "0", \$2 }' ${genome_index} > ${genome_index.baseName}.bed + """ +} diff --git a/Utils/CreateChrFiles.nf b/Utils/CreateChrFiles.nf new file mode 100755 index 00000000..3bbf8860 --- /dev/null +++ b/Utils/CreateChrFiles.nf @@ -0,0 +1,31 @@ +process CreateChrFiles { + tag {"CreateChrFiles ${genome_fasta.baseName}"} + label 'CreateChrFiles' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(genome_fasta) + + output: + path("./chr_files/", emit: chr_files) + + script: + """ + chr_files="./chr_files/" + if [[ ! -d \${chr_files} ]]; then + + ################# + ### Split per chr + ################# + mkdir -p \${chr_files} + + # split based on >chr entry + csplit -s -z ${genome_fasta} '/>/' '{*}' + # move resulting files using >chr entry for filename + for i in xx* ; do + n=\$(sed 's/>// ; s/ .*// ; 1q' "\$i") + mv "\$i" \${chr_files}/"\${n}.fa" + done + fi + """ +} diff --git a/Utils/CreateLen.nf b/Utils/CreateLen.nf new file mode 100755 index 00000000..7f3ef48f --- /dev/null +++ b/Utils/CreateLen.nf @@ -0,0 +1,17 @@ +process CreateLen { + tag {"CreateLen ${genome_index.baseName}"} + label 'CreateLen' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(genome_index) + + output: + path("${genome_index.baseName}.len", emit: genome_len) + + script: + """ + awk '{gsub("chr", "", \$1); print \$1"\\tchr"\$1"\\t"\$2}' ${genome_index} > ${genome_index.baseName}.len + """ + +} diff --git a/Utils/fast5.nf b/Utils/fast5.nf new file mode 100644 index 00000000..b9717466 --- /dev/null +++ b/Utils/fast5.nf @@ -0,0 +1,28 @@ +def extractFast5FromDir(dir) { + // Original code from: https://github.com/SciLifeLab/Sarek - MIT License - Copyright (c) 2016 SciLifeLab + dir = dir.tokenize().collect{"$it/*.fast5"} + Channel + .fromPath(dir, type:'file') + .ifEmpty { error "No fast5 files found in ${dir}." } + .map { fast5_path -> + def sample_id = fast5_path.getSimpleName().split('_')[0] + def chunk = fast5_path.getSimpleName().split('_')[3] + [sample_id, chunk, fast5_path] + } +} + +def extractAllFast5FromDir(dir) { + // Original code from: https://github.com/SciLifeLab/Sarek - MIT License - Copyright (c) 2016 SciLifeLab + dir = dir.tokenize().collect{"$it/*.fast5"} + Channel + .fromPath(dir, type:'file') + .ifEmpty { error "No fast5 files found in ${dir}." } + .map { fast5_path -> + fast5_files = [fast5_path] + sample_id = fast5_path.getSimpleName().split('_')[0] + def sample_id = fast5_path.getSimpleName().split('_')[0] + def chunk = fast5_path.getSimpleName().split('_')[4] + + [sample_id, chunk, fast5_files] + } +} diff --git a/Utils/fastq.nf b/Utils/fastq.nf index bd0a5b00..21e4d5c7 100644 --- a/Utils/fastq.nf +++ b/Utils/fastq.nf @@ -17,7 +17,7 @@ def flowcellLaneFromFastq(path) { def fields = line.split(' ')[0].split(':') String machine int run_nr - String fcid + String fcid = line.split(' ')[0].replaceAll("[./*@,?]", "_") int lane if (fields.size() == 7 || fields.size() == 8) { diff --git a/Utils/tsv.nf b/Utils/tsv.nf new file mode 100644 index 00000000..cae5c5d3 --- /dev/null +++ b/Utils/tsv.nf @@ -0,0 +1,14 @@ +def extractAllTsvFromDir(dir) { + // Original code from: https://github.com/SciLifeLab/Sarek - MIT License - Copyright (c) 2016 SciLifeLab + dir = dir.tokenize().collect{"$it/*.tsv"} + Channel + .fromPath(dir, type:'file') + .ifEmpty { error "No tsv files found in ${dir}." } + .map { tsv_path -> + tsv_files = [tsv_path] + sample_id = tsv_path.getSimpleName().split('_')[0] + chunk = tsv_path.getSimpleName().split('_')[4] + + [sample_id, chunk, tsv_files] + } +} diff --git a/Utils/vcf.nf b/Utils/vcf.nf index f79ebf82..d4a93b12 100644 --- a/Utils/vcf.nf +++ b/Utils/vcf.nf @@ -1,14 +1,16 @@ - def extractVCFFromDir(dir){ - Channel - .fromPath("${dir}/**.vcf", type:'file') - .ifEmpty { error "No .vcf files found in ${dir}!" } - .map{ vcf -> - def id = vcf.getSimpleName() - def idx_path = file(vcf.toString().replace('.vcf','.vcf.idx')) - if (! idx_path.exists()){ - error "No .idx file found for ${vcf}!" + Channel + .fromPath(["${dir}/**.vcf", "${dir}/**.vcf.gz"], type:'file') + .ifEmpty { error "No .vcf or .vcf.gz files found in ${dir}!" } + .map{ vcf -> + def id = vcf.getSimpleName() + def idx_path = file(vcf.toString().replace('.vcf','.vcf.idx')) + if (! idx_path.exists()){ + idx_path = file(vcf.toString().replace('.vcf.gz','.vcf.gz.tbi')) + if (! idx_path.exists()){ + error "No index file (vcf.idx or vcf.gz.tbi) found for ${vcf}!" + } + } + [id, vcf, idx_path] } - [id, vcf, idx_path] - } } diff --git a/edgeR/3.28.0/normalize.nf b/edgeR/3.28.0/normalize.nf new file mode 100755 index 00000000..b095b03f --- /dev/null +++ b/edgeR/3.28.0/normalize.nf @@ -0,0 +1,21 @@ +process EdgerNormalize { + tag "edger_normalize ${run_id}" + label 'biconductor_3_20_7' + label 'biconductor_3_20_7_edger_normalize' + container = 'quay.io/biocontainers/bioconductor-edger:3.20.7--r3.4.1_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + file(counts) + + output: + file("${run_id}_featureCounts_RPKM.txt") + file("${run_id}_featureCounts_CPM.txt") + + script: + """ + edgerNormalize.R ${counts} ${run_id} + """ + +} diff --git a/edgeR/3.40.0/normalize.nf b/edgeR/3.40.0/normalize.nf new file mode 100644 index 00000000..e1d81e97 --- /dev/null +++ b/edgeR/3.40.0/normalize.nf @@ -0,0 +1,20 @@ +process EdgerNormalize { + tag "edger_normalize ${run_id}" + label 'biconductor_3_40_0' + label 'biconductor_3_40_0_edger_normalize' + container = 'quay.io/biocontainers/bioconductor-edger:3.40.0--r42hc247a5b_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + val(run_id) + file(counts) + + output: + file("${run_id}_featureCounts_RPKM.txt") + file("${run_id}_featureCounts_CPM.txt") + + script: + """ + edgerNormalize.R ${counts} ${run_id} + """ +} \ No newline at end of file diff --git a/snpEff/5.1d/SnpSiftAnnotate.nf b/snpEff/5.1d/SnpSiftAnnotate.nf new file mode 100755 index 00000000..471232cf --- /dev/null +++ b/snpEff/5.1d/SnpSiftAnnotate.nf @@ -0,0 +1,26 @@ +process SNPSiftAnnotate { + tag {"SNPEff SNPSiftAnnotate ${run_id}"} + label 'SNPEff_5_1d' + label 'SNPEff_5_1d_SNPSiftAnnotate' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'quay.io/biocontainers/snpsift:5.1d--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), path(vcf), path(vcfidx)) + + output: + tuple(val(run_id), path("${vcf.baseName}_${db_name}.vcf"), emit: snpsift_annoted_vcfs) + + script: + db_file = file(params.genome_snpsift_annotate_db).getBaseName() + db_name = db_file.replaceFirst(~/\.[^\.]+$/, '') + + """ + set -o pipefail + SnpSift -Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR annotate \ + ${params.optional} \ + ${params.genome_snpsift_annotate_db} \ + $vcf > ${vcf.baseName}_${db_name}.vcf + """ +} diff --git a/snpEff/5.1d/SnpSiftDbnsfp.nf b/snpEff/5.1d/SnpSiftDbnsfp.nf new file mode 100755 index 00000000..4afad482 --- /dev/null +++ b/snpEff/5.1d/SnpSiftDbnsfp.nf @@ -0,0 +1,21 @@ +process SNPSiftDbnsfp { + tag {"SNPEff SNPSiftDbnsfp ${run_id}"} + label 'SNPEff_5_1d' + label 'SNPEff_5_1d_SNPSiftDbnsfp' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'quay.io/biocontainers/snpsift:5.1d--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), path(vcf), path(vcfidx)) + + output: + tuple(val(run_id), path("${vcf.baseName}_dbnsfp.vcf"), emit : snpsift_dbnsfp_vcfs) + + script: + """ + SnpSift -Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR dbnsfp -v \ + ${params.optional} \ + -db ${params.genome_dbnsfp} $vcf > ${vcf.baseName}_dbnsfp.vcf + """ +} diff --git a/snpEff/5.1d/snpEffFilter.nf b/snpEff/5.1d/snpEffFilter.nf new file mode 100755 index 00000000..dd3dbf02 --- /dev/null +++ b/snpEff/5.1d/snpEffFilter.nf @@ -0,0 +1,31 @@ +process SNPEffFilter { + tag {"SNPEff SNPEffFilter ${run_id}"} + label 'SNPEff_5_1d' + label 'SNPEff_5_1d_SNPEffFilter' + clusterOptions = workflow.profile == "sge" ? "-l h_vmem=${params.mem}" : "" + container = 'quay.io/biocontainers/snpeff:5.1d--hdfd78af_0' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + tuple(val(run_id), path(vcf), path(vcfidx)) + + output: + tuple(val(run_id), path("${vcf.simpleName}.filtered_variants.vcf"), emit: snpeff_filtered) + + script: + print params.snpeff_datadir + if( !params.snpeff_datadir || params.snpeff_datadir==null|| params.snpeff_datadir=="" || params.snpeff_datadir=="null" ){ + datadir="\$TMPDIR" + } else { + datadir=params.snpeff_datadir + } + """ + snpEff -Xmx${task.memory.toGiga()-4}g -Djava.io.tmpdir=\$TMPDIR \ + -c snpEff.config \ + ${params.snpeff_genome} \ + ${params.optional} \ + -dataDir ${datadir} \ + -v $vcf \ + > ${vcf.simpleName}.filtered_variants.vcf + """ +}