version 1.0 ##Copyright Broad Institute, 2018 ## ## This WDL converts paired FASTQ to uBAM and adds read group information ## ## Requirements/expectations : ## - Pair-end sequencing data in FASTQ format (one file per orientation) ## - The following metada descriptors per sample: ## - readgroup ## - sample_name ## - library_name ## - platform_unit ## - run_date ## - platform_name ## - sequecing_center ## ## Outputs : ## - Set of unmapped BAMs, one per read group ## - File of a list of the generated unmapped BAMs ## ## Cromwell version support ## - Successfully tested on v47 ## - Does not work on versions < v23 due to output syntax ## ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. ## For program versions, see docker containers. ## ## LICENSING : ## This script is released under the WDL source code license (BSD-3) (see LICENSE in ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may ## be subject to different licenses. Users are responsible for checking that they are ## authorized to run all programs before running this script. Please see the docker ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed ## licensing information pertaining to the included programs. # WORKFLOW DEFINITION workflow ConvertPairedFastQsToUnmappedBamWf { input { String sample_name File fastq_1 File fastq_2 String readgroup_name String run_date String library_name String platform_name String sequencing_center String gatk_docker } #String gatk_docker = "022521056385.dkr.ecr.us-east-1.amazonaws.com/gatk:4.1.9.0" String gatk_path = "/gatk/gatk" # Convert pair of FASTQs to uBAM call PairedFastQsToUnmappedBAM { input: sample_name = sample_name, fastq_1 = fastq_1, fastq_2 = fastq_2, readgroup_name = readgroup_name, run_date = run_date, library_name = library_name, platform_name = platform_name, sequencing_center = sequencing_center, gatk_path = gatk_path, docker = gatk_docker, } # Outputs that will be retained when execution is complete output { File output_unmapped_bam = PairedFastQsToUnmappedBAM.output_unmapped_bam } } # TASK DEFINITIONS # Convert a pair of FASTQs to uBAM task PairedFastQsToUnmappedBAM { input { # Command parameters String sample_name File fastq_1 File fastq_2 String readgroup_name String gatk_path String run_date String library_name String platform_name String sequencing_center # Runtime parameters Int machine_mem_gb = 7 String docker } Int command_mem_gb = machine_mem_gb - 1 command { echo "FASTQ to uBAM" >&2 echo "fastq_1 ~{fastq_1}" >&2 echo "fastq_2 ~{fastq_2}" >&2 echo "sample_name ~{sample_name}" >&2 echo "readgroup_name ~{readgroup_name}" >&2 ~{gatk_path} --java-options "-Xmx~{command_mem_gb}g" \ FastqToSam \ --FASTQ ~{fastq_1} \ --FASTQ2 ~{fastq_2} \ --OUTPUT ~{readgroup_name}.unmapped.bam \ --READ_GROUP_NAME ~{readgroup_name} \ --SAMPLE_NAME ~{sample_name} \ --LIBRARY_NAME ~{library_name} \ --RUN_DATE ~{run_date} \ --PLATFORM ~{platform_name} \ --SEQUENCING_CENTER ~{sequencing_center} # Creates a file of file names of the uBAM, which is a text file with each row having the path to the file. # In this case there will only be one file path in the txt file but this format is used by # the pre-processing for variant discovery workflow. } runtime { docker: docker memory: machine_mem_gb + " GiB" cpu: 4 } output { File output_unmapped_bam = "~{readgroup_name}.unmapped.bam" } }