version 1.0
##Copyright Broad Institute, 2018
##
## This WDL converts paired FASTQ to uBAM and adds read group information
##
## Requirements/expectations :
## - Pair-end sequencing data in FASTQ format (one file per orientation)
## - The following metada descriptors per sample:
##  - readgroup
##  - sample_name
##  - library_name
##  - platform_unit
##  - run_date
##  - platform_name
##  - sequecing_center
##
## Outputs :
## - Set of unmapped BAMs, one per read group
## - File of a list of the generated unmapped BAMs
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.

# WORKFLOW DEFINITION
workflow ConvertPairedFastQsToUnmappedBamWf {
    input {
        String sample_name
        File fastq_1
        File fastq_2
        String readgroup_name
        String platform
        String ecr_registry
    }

    String gatk_docker = ecr_registry + "/ecr-public/aws-genomics/broadinstitute/gatk:4.2.6.1-corretto-11"
    String gatk_path = "/gatk/gatk"


    # Convert pair of FASTQs to uBAM
    call PairedFastQsToUnmappedBAM {
        input:
            sample_name = sample_name,
            fastq_1 = fastq_1,
            fastq_2 = fastq_2,
            readgroup_name = readgroup_name,
            platform = platform,
            gatk_path = gatk_path,
            docker = gatk_docker
    }


    # Outputs that will be retained when execution is complete
    output {
        File output_unmapped_bam = PairedFastQsToUnmappedBAM.output_unmapped_bam
    }
}

# TASK DEFINITIONS

# Convert a pair of FASTQs to uBAM
task PairedFastQsToUnmappedBAM {
    input {
        # Command parameters
        String sample_name
        File fastq_1
        File fastq_2
        String readgroup_name
        # The platform type (e.g. illumina, solid)
        String platform
        String gatk_path

        # Runtime parameters
        Int machine_mem_gb = 7
        String docker
    }
    Int command_mem_gb = machine_mem_gb - 1
    String disk_usage_cmd = "echo storage remaining: $(df -Ph . | awk 'NR==2 {print $4}')"

    command {
        # determine scratch size used
        ~{disk_usage_cmd}

        echo "FASTQ to uBAM" >&2
        echo "fastq_1 ~{fastq_1}" >&2
        echo "fastq_2 ~{fastq_2}" >&2
        echo "sample_name ~{sample_name}" >&2
        echo "readgroup_name ~{readgroup_name}" >&2
        echo "platform ~{readgroup_name}" >&2

        ~{gatk_path} --java-options "-Dsamjdk.compression_level=2 -Xmx~{command_mem_gb}g" \
        FastqToSam \
        --FASTQ ~{fastq_1} \
        --FASTQ2 ~{fastq_2} \
        --OUTPUT ~{readgroup_name}.unmapped.bam \
        --READ_GROUP_NAME ~{readgroup_name} \
        --PLATFORM ~{platform} \
        --SAMPLE_NAME ~{sample_name}

        # determine final scratch size used
        ~{disk_usage_cmd}
    }
    runtime {
        docker: docker
        memory: machine_mem_gb + " GiB"
        cpu: 2
    }
    output {
        File output_unmapped_bam = "~{readgroup_name}.unmapped.bam"
    }
}