diff --git a/Dockstore.json b/Dockstore.json new file mode 100644 index 0000000..6b45ccd --- /dev/null +++ b/Dockstore.json @@ -0,0 +1,18 @@ +{ + "reference": { + "path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz", + "class": "File" + }, + "normal": { + "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam", + "class": "File" + }, + "tumor": { + "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam", + "class": "File" + }, + "mutations": { + "path": "/tmp/mutect.vcf", + "class": "File" + } +} diff --git a/mutect.cwl.yaml b/mutect.cwl similarity index 62% rename from mutect.cwl.yaml rename to mutect.cwl index 86ffdd7..4d6ef46 100644 --- a/mutect.cwl.yaml +++ b/mutect.cwl @@ -1,79 +1,81 @@ cwlVersion: v1.0 class: CommandLineTool -label: MuTect + +doc: "Mutect 1.1.5" + +hints: + DockerRequirement: + dockerPull: quay.io/opengenomics/mutect + baseCommand: ['python', '/opt/mutect.py'] -requirements: - - class: "DockerRequirement" - dockerImageId: "mutect:1.1.5" + inputs: - - id: "#tumor" + tumor: type: File inputBinding: prefix: --input_file:tumor - secondaryFiles: - - .bai - - id: "#normal" + normal: type: File inputBinding: prefix: --input_file:normal - secondaryFiles: - - .bai - - id: "#reference" + reference: type: File inputBinding: prefix: --reference_sequence - secondaryFiles: - - .fai - - ^.dict - - id: "#cosmic" - type: File + cosmic: + type: File? inputBinding: prefix: --cosmic - - id: "#dbsnp" - type: File + dbsnp: + type: File? inputBinding: prefix: --dbsnp secondaryFiles: .tbi - - id: "#tumor_lod" - type: float + tumor_lod: + type: float? default: 6.3 inputBinding: prefix: --tumor_lod - - id: "#initial_tumor_lod" - type: float + initial_tumor_lod: + type: float? default: 4.0 inputBinding: prefix: --initial_tumor_lod - - id: "#out" - type: string + ncpus: + type: int? + inputBinding: + position: 2 + prefix: --ncpus + out: + type: string? default: call_stats.txt inputBinding: prefix: --out - - id: "#coverage_file" - type: string + coverage_file: + type: string? default: coverage.wig.txt inputBinding: prefix: --coverage_file - - id: "#vcf" - type: string + vcf: + type: string? default: mutations.vcf inputBinding: prefix: --vcf outputs: - - id: "#coverage" + coverage: type: File outputBinding: glob: $(inputs.coverage_file) - - id: "#call_stats" + call_stats: type: File outputBinding: glob: $(inputs.out) - - id: "#mutations" + mutations: type: File outputBinding: glob: $(inputs.vcf) diff --git a/mutect.py b/mutect.py index 39b99f3..6fb894c 100755 --- a/mutect.py +++ b/mutect.py @@ -12,6 +12,15 @@ from string import Template from multiprocessing import Pool +def gunzip(infile, outfile): + cmd = ' '.join(['zcat', infile]) + with open(outfile, 'w') as outF: + p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) + stdout,stderr = p.communicate() + if len(stderr): + print "unzip command failed:", stderr + raise Exception("unzip failed") + def fai_chunk(path, blocksize): seq_map = {} with open( path ) as handle: @@ -121,7 +130,10 @@ def run_mutect(args): ref_seq = os.path.join(workdir, "ref_genome.fasta") ref_dict = os.path.join(workdir, "ref_genome.dict") - os.symlink(os.path.abspath(args['reference_sequence']), ref_seq) + if args['reference_sequence'].endswith('.gz'): + gunzip(args['reference_sequence'], ref_seq) + else: + os.symlink(os.path.abspath(args['reference_sequence']), ref_seq) subprocess.check_call( ["/usr/bin/samtools", "faidx", ref_seq] ) subprocess.check_call( [args['java'], "-jar", args['dict_jar'],