#!/usr/bin/env nextflow

nextflow.enable.dsl=2

// Define parameters with default values
// Define parameters with default values that can be overridden
params.ref_genome = "/var/www/html/anshu/RAPID-engine/reference_genome/acinetobacter_genome.fa"
params.reads_dir = "/var/www/html/anshu/RAPID-engine/uploads"
params.output_dir = "/var/www/html/anshu/RAPID-engine/result"
params.threads = 32
params.qc_dir = "${params.output_dir}/qc"
params.trim_dir = "${params.output_dir}/trimmed"
params.trimqc_dir = "${params.output_dir}/trimmed_qc"
params.index_dir = "${params.output_dir}/index"
params.call_variants_dir = "${params.output_dir}/call_variants"
params.filter_variants_dir = "${params.output_dir}/filter_variants"
params.stat_dir = "${params.output_dir}/stat_analyze"

log.info """
    A C I N E T O B A C T E R   A N A L Y S I S   P I P E L I N E    
    ===================================================
    Reference Genome: ${params.ref_genome}
    Reads Directory : ${params.reads_dir}
    Output Directory: ${params.output_dir}
    Threads         : ${params.threads}
    """

// Process definitions
process FASTQC_RAW {
    container 'staphb/fastqc:0.11.9'
    publishDir params.qc_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(reads)

    output:
    path "${sample_id}_*_fastqc.{zip,html}"

    script:
    """
    fastqc ${reads} --outdir .
    """
}

process FASTP {
    container 'staphb/fastp:0.23.2'
    publishDir params.trim_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(reads)

    output:
    tuple val(sample_id), path("${sample_id}_1.trimmed.fastq.gz"), path("${sample_id}_2.trimmed.fastq.gz"), emit: trimmed_reads
    path "${sample_id}_fastp.{json,html}"

    script:
    """
    fastp \
        -i ${reads[0]} \
        -I ${reads[1]} \
        -o ${sample_id}_1.trimmed.fastq.gz \
        -O ${sample_id}_2.trimmed.fastq.gz \
        -j ${sample_id}_fastp.json \
        -h ${sample_id}_fastp.html \
        -w ${params.threads}
    """
}

process FASTQC_TRIMMED {
    container 'staphb/fastqc:0.11.9'
    publishDir params.trimqc_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(reads1), path(reads2)

    output:
    path "${sample_id}*_fastqc.{zip,html}"

    script:
    """
    fastqc ${reads1} ${reads2} --outdir .
    """
}

process INDEX_REFERENCE {
    container 'staphb/bwa:0.7.17'
    publishDir "${params.output_dir}/reference", mode: 'copy'
    
    input:
    path reference

    output:
    tuple path(reference), path("*.{amb,ann,bwt,pac,sa}"), emit: index_files

    script:
    """
    bwa index ${reference}
    samtools faidx ${reference}
    """
}

process ALIGN_READS {
    container 'staphb/bwa:0.7.17'
    publishDir "${params.output_dir}/aligned", mode: 'copy'

    input:
    tuple val(sample_id), path(reads1), path(reads2)
    tuple path(ref_genome), path(index_files)

    output:
    tuple val(sample_id), path("${sample_id}.bam"), path("${sample_id}.bam.bai")

    script:
    """
    bwa mem -t ${params.threads} ${ref_genome} ${reads1} ${reads2} | \
    samtools view -bS - | \
    samtools sort -o ${sample_id}.bam
    samtools index ${sample_id}.bam
    """
}

process CALL_VARIANTS {
    container 'staphb/bcftools:1.13'
    publishDir params.call_variants_dir, mode: 'copy'

    input:
    tuple path(ref_genome), val(sample_id), path(bam_file)

    output:
    tuple val(sample_id), path("${sample_id}.variants.vcf")

    script:
    """
    bcftools mpileup -f ${ref_genome} ${bam_file} | \
    bcftools call -mv -Ov -o ${sample_id}.variants.vcf
    """
}

process PROCESS_VCF {
    container 'staphb/bcftools:1.13'
    publishDir params.call_variants_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(vcf_file)

    output:
    tuple val(sample_id), path("${sample_id}.processed.vcf")

    script:
    """
    bcftools sort ${vcf_file} -o ${sample_id}.processed.vcf
    """
}

process FILTER_VARIANTS {
    container 'staphb/bcftools:1.13'
    publishDir params.filter_variants_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(vcf_file)

    output:
    tuple val(sample_id), path("${sample_id}.filtered_variants.vcf")

    script:
    """
    bcftools filter -i 'QUAL>30 && DP>10' ${vcf_file} -o ${sample_id}.filtered_variants.vcf
    """
}

process ANALYZE_RESULTS {
    container 'staphb/bcftools:1.13'
    publishDir params.stat_dir, mode: 'copy'

    input:
    tuple val(sample_id), path(vcf_file)

    output:
    path "${sample_id}.variant_stats.txt"

    script:
    """
    bcftools stats ${vcf_file} > ${sample_id}.variant_stats.txt
    """
}

// Main workflow
workflow {
    // Create input channel for paired-end read files
    read_pairs = Channel
        .fromFilePairs("${params.reads_dir}/*_{1,2}.fastq*.gz", checkIfExists: true)
        .ifEmpty { error "No matching read files found in ${params.reads_dir}" }

    // Run QC on raw reads
    FASTQC_RAW(read_pairs)

    // Trim reads and get QC
    FASTP(read_pairs)
    trimmed_reads = FASTP.out.trimmed_reads
    FASTQC_TRIMMED(trimmed_reads)

    // Index reference and align reads
    INDEX_REFERENCE(params.ref_genome)
    ALIGN_READS(trimmed_reads, INDEX_REFERENCE.out.index_files)
    
    // Prepare input for variant calling
    variants_input = ALIGN_READS.out.map { sample_id, bam, bai -> 
        tuple(params.ref_genome, sample_id, bam)
    }
    
    // Variant calling and processing
    CALL_VARIANTS(variants_input)
    PROCESS_VCF(CALL_VARIANTS.out)
    FILTER_VARIANTS(PROCESS_VCF.out)
    ANALYZE_RESULTS(FILTER_VARIANTS.out)
}

// Completion handler
workflow.onComplete {
    log.info(workflow.success ? 
        "\nPipeline completed successfully!" : 
        "\nPipeline failed. Check the logs.")
}

