oxo-call 0.11.0

Model-intelligent orchestration for CLI bioinformatics — call any tool with LLM intelligence
Documentation
#!/usr/bin/env nextflow
// =============================================================================
// WGBS methylation workflow — Trim Galore QC → Bismark alignment → dedup → methylation extraction
//
// Usage:
//   nextflow run methylseq.nf --samplesheet samplesheet.csv \
//                              --bismark_index /path/to/bismark_genome
//
// Samplesheet CSV format (with header row):
//   sample_id,r1,r2
//   sample1,/path/R1.fastq.gz,/path/R2.fastq.gz
// =============================================================================

nextflow.enable.dsl = 2

params.samplesheet  = "samplesheet.csv"
params.bismark_index = null
params.outdir       = "results"
params.threads      = 8


// ── Channel setup ──────────────────────────────────────────────────────────
Channel
    .fromPath(params.samplesheet)
    .splitCsv(header: true)
    .map { row -> tuple(row.sample_id, file(row.r1), file(row.r2)) }
    .set { reads_ch }


// ── Processes ─────────────────────────────────────────────────────────────

process TRIM_GALORE {
    tag "${sample_id}"
    publishDir "${params.outdir}/trimmed", mode: 'copy'

    input:
    tuple val(sample_id), path(r1), path(r2)

    output:
    tuple val(sample_id), path("${sample_id}_R1_val_1.fq.gz"), path("${sample_id}_R2_val_2.fq.gz"), emit: trimmed
    path "${sample_id}_R1_val_1_fastqc.zip", emit: fastqc

    script:
    """
    trim_galore \\
        --paired --cores ${params.threads} \\
        --fastqc --illumina \\
        --output_dir . \\
        ${r1} ${r2}
    mv \$(basename ${r1} .fastq.gz)_val_1.fq.gz ${sample_id}_R1_val_1.fq.gz
    mv \$(basename ${r2} .fastq.gz)_val_2.fq.gz ${sample_id}_R2_val_2.fq.gz
    """
}

process BISMARK_ALIGN {
    tag "${sample_id}"
    publishDir "${params.outdir}/aligned", mode: 'copy'

    input:
    tuple val(sample_id), path(r1), path(r2)

    output:
    tuple val(sample_id), path("${sample_id}_bismark_bt2_pe.bam"), emit: bam
    path "${sample_id}_bismark_bt2_PE_report.txt", emit: report

    script:
    """
    bismark \\
        --genome ${params.bismark_index} \\
        -1 ${r1} -2 ${r2} \\
        --output_dir . \\
        --prefix ${sample_id} \\
        --parallel ${params.threads} \\
        --non_directional
    """
}

process BISMARK_DEDUP {
    tag "${sample_id}"
    publishDir "${params.outdir}/deduped", mode: 'copy'

    input:
    tuple val(sample_id), path(bam)

    output:
    tuple val(sample_id), path("${sample_id}_bismark_bt2_pe.deduplicated.bam"), emit: bam
    path "${sample_id}_bismark_bt2_pe.deduplication_report.txt", emit: report

    script:
    """
    deduplicate_bismark \\
        --paired \\
        --output_dir . \\
        ${bam}
    """
}

process SORT_INDEX {
    tag "${sample_id}"
    publishDir "${params.outdir}/deduped", mode: 'copy'

    input:
    tuple val(sample_id), path(bam)

    output:
    tuple val(sample_id), path("${sample_id}.sorted.bam"), path("${sample_id}.sorted.bam.bai"), emit: bam

    script:
    """
    samtools sort -@ ${params.threads} -o ${sample_id}.sorted.bam ${bam}
    samtools index ${sample_id}.sorted.bam
    """
}

process METHYLATION_EXTRACT {
    tag "${sample_id}"
    publishDir "${params.outdir}/methyl", mode: 'copy'

    input:
    tuple val(sample_id), path(bam), path(bai)

    output:
    path "${sample_id}.sorted_CpG.bedGraph", emit: bedgraph
    path "${sample_id}.sorted.bismark.cov.gz"
    path "${sample_id}.sorted_splitting_report.txt", emit: report

    script:
    """
    bismark_methylation_extractor \\
        --paired-end --comprehensive --CX_context \\
        --cytosine_report \\
        --genome_folder ${params.bismark_index} \\
        --output . \\
        --parallel ${params.threads} \\
        ${bam}
    """
}

process MULTIQC {
    publishDir "${params.outdir}/multiqc", mode: 'copy'

    input:
    path "*"

    output:
    path "multiqc_report.html"

    script:
    """
    multiqc .
    """
}


// ── Workflow ──────────────────────────────────────────────────────────────

workflow {
    TRIM_GALORE(reads_ch)
    BISMARK_ALIGN(TRIM_GALORE.out.trimmed)
    BISMARK_DEDUP(BISMARK_ALIGN.out.bam)
    SORT_INDEX(BISMARK_DEDUP.out.bam)
    METHYLATION_EXTRACT(SORT_INDEX.out.bam)

    // QC aggregation — runs in parallel with Bismark alignment
    qc_files = TRIM_GALORE.out.fastqc.collect()
    MULTIQC(qc_files)
}

/*
// nextflow.config:
process {
    cpus   = 8
    memory = '64 GB'
    time   = '8h'
}
executor {
    name      = 'local'
    cpus      = 32
    memory    = '256 GB'
}
*/