#!/usr/bin/env nextflow

nextflow.enable.dsl=2

// Define parameters with default values that can be overridden
params.ref_genome = "/home/gpsr/webserver/cgidocs/anshu/RAPID-engine/reference_genome/acinetobacter_genome.fa"
params.reads_dir = "/home/gpsr/webserver/cgidocs/anshu/RAPID-engine/upload"
params.output_dir = "/home/gpsr/webserver/cgidocs/anshu/RAPID-engine/result"
params.threads = 8

// Log parameters for clarity
log.info """
    A C I N E T O B A C T E R   A N A L Y S I S   P I P E L I N E
    ===================================================
    Reference Genome: ${params.ref_genome}
    Reads Directory : ${params.reads_dir}
    Output Directory: ${params.output_dir}
    Threads         : ${params.threads}
    """

// Workflow definition
workflow {
    // Create input channel for read files
    read_pairs = Channel
        .fromPath("${params.reads_dir}/*.{fastq,fastq.gz,fasta}") // Accepts .fastq, .fastq.gz, .fasta
        .map { file -> 
            def sample_name = file.baseName.replaceFirst(/\.(fastq|fasta|gz)$/, '') // Remove extension
            [sample_name, file] 
        }

    // Run processes in sequence
    INDEX_REFERENCE(params.ref_genome)
    ALIGN_READS(read_pairs, params.ref_genome, INDEX_REFERENCE.out)
    CALL_VARIANTS(params.ref_genome, ALIGN_READS.out.map { it[1] }.collect())
    FILTER_VARIANTS(CALL_VARIANTS.out)
    ANALYZE_RESULTS(FILTER_VARIANTS.out[1])
}

// Process 1: Index Reference Genome
process INDEX_REFERENCE {
    container 'singularity://staphb/bwa:0.7.17'
    
    input:
    path ref_genome

    output:
    path "${ref_genome}*"

    script:
    """
    bwa index ${ref_genome}
    samtools faidx ${ref_genome}
    """
}

// Process 2: Align Reads to Reference
process ALIGN_READS {
    container 'singularity://staphb/bwa:0.7.17'
    publishDir params.output_dir, mode: 'copy'

    input:
    tuple val(sample_name), path(reads)
    path ref_genome
    path bwa_index

    output:
    tuple val(sample_name), path("${sample_name}.bam"), path("${sample_name}.bam.bai")

    script:
    """
    bwa mem -t ${params.threads} ${ref_genome} ${reads} | \
    samtools view -bS - | \
    samtools sort -o ${sample_name}.bam
    samtools index ${sample_name}.bam
    """
}

// Process 3: Call Variants
process CALL_VARIANTS {
    container 'singularity://staphb/freebayes:1.3.5'
    publishDir params.output_dir, mode: 'copy'

    input:
    path ref_genome
    path bams

    output:
    path "variants.vcf"

    script:
    """
    freebayes -f ${ref_genome} ${bams} > variants.vcf
    """
}

// Process 4: Filter Variants
process FILTER_VARIANTS {
    container 'singularity://quay.io/biocontainers/vcftools:0.1.16--he513fc3_1'
    publishDir params.output_dir, mode: 'copy'

    input:
    path raw_vcf

    output:
    path "filtered_variants.recode.vcf"
    path "allele_frequencies.frq"

    script:
    """
    vcftools --vcf ${raw_vcf} \
             --min-alleles 2 --max-alleles 2 \
             --minQ 30 --min-meanDP 10 \
             --recode --recode-INFO-all \
             --out filtered_variants

    vcftools --vcf filtered_variants.recode.vcf \
             --freq2 --out allele_frequencies
    """
}

// Process 5: Analyze Results
process ANALYZE_RESULTS {
    container 'singularity://ubuntu:20.04'
    publishDir params.output_dir, mode: 'copy'

    input:
    path freq_file

    output:
    path "top_10_snps.txt"

    script:
    """
    sort -k5 -nr ${freq_file} | head -n 10 > top_10_snps.txt
    """
}

// Workflow completion handler
workflow.onComplete {
    log.info ( workflow.success ? 
        "Pipeline completed successfully!" : 
        "Pipeline failed. Check the logs." 
    )
}

