oxo-call 0.11.0

Model-intelligent orchestration for CLI bioinformatics — call any tool with LLM intelligence
Documentation
#!/usr/bin/env nextflow
// =============================================================================
// ChIP-seq workflow — fastp QC → Bowtie2 → Picard MarkDup → filter → MACS3 → bigWig
//
// Usage:
//   nextflow run chipseq.nf --samplesheet samplesheet.csv \
//                           --bowtie2_index /path/to/bt2_index \
//                           --blacklist /path/to/blacklist.bed \
//                           --genome_size hs
//
// Samplesheet CSV format (with header row):
//   sample_id,r1,r2
//   H3K27ac_rep1,/path/R1.fastq.gz,/path/R2.fastq.gz
// =============================================================================

nextflow.enable.dsl = 2

params.samplesheet  = "samplesheet.csv"
params.bowtie2_index = null
params.blacklist    = null
params.genome_size  = "hs"
params.outdir       = "results"
params.threads      = 8


// ── Channel setup ──────────────────────────────────────────────────────────
Channel
    .fromPath(params.samplesheet)
    .splitCsv(header: true)
    .map { row -> tuple(row.sample_id, file(row.r1), file(row.r2)) }
    .set { reads_ch }


// ── Processes ─────────────────────────────────────────────────────────────

process FASTP {
    tag "${sample_id}"
    publishDir "${params.outdir}/qc", mode: 'copy'

    input:
    tuple val(sample_id), path(r1), path(r2)

    output:
    tuple val(sample_id), path("${sample_id}_R1.fastq.gz"), path("${sample_id}_R2.fastq.gz"), emit: trimmed
    path "${sample_id}_fastp.json", emit: json
    path "${sample_id}_fastp.html"

    script:
    """
    fastp \\
        --in1 ${r1} --in2 ${r2} \\
        --out1 ${sample_id}_R1.fastq.gz --out2 ${sample_id}_R2.fastq.gz \\
        --html ${sample_id}_fastp.html --json ${sample_id}_fastp.json \\
        --thread ${params.threads} \\
        --detect_adapter_for_pe \\
        --qualified_quality_phred 20 \\
        --length_required 20
    """
}

process BOWTIE2_ALIGN {
    tag "${sample_id}"
    publishDir "${params.outdir}/aligned", mode: 'copy'

    input:
    tuple val(sample_id), path(r1), path(r2)

    output:
    tuple val(sample_id), path("${sample_id}.sorted.bam"), path("${sample_id}.sorted.bam.bai"), emit: bam

    script:
    """
    bowtie2 \\
        -x ${params.bowtie2_index} \\
        -1 ${r1} -2 ${r2} \\
        -p ${params.threads} \\
        --no-mixed --no-discordant \\
        | samtools sort -@ 4 -o ${sample_id}.sorted.bam
    samtools index ${sample_id}.sorted.bam
    """
}

process MARK_DUPLICATES {
    tag "${sample_id}"
    publishDir "${params.outdir}/aligned", mode: 'copy'

    input:
    tuple val(sample_id), path(bam), path(bai)

    output:
    tuple val(sample_id), path("${sample_id}.markdup.bam"), path("${sample_id}.markdup.bam.bai"), emit: bam
    path "${sample_id}.markdup_metrics.txt", emit: metrics

    script:
    """
    picard MarkDuplicates \\
        I=${bam} O=${sample_id}.markdup.bam \\
        M=${sample_id}.markdup_metrics.txt \\
        REMOVE_DUPLICATES=true
    samtools index ${sample_id}.markdup.bam
    """
}

process FILTER_READS {
    tag "${sample_id}"
    publishDir "${params.outdir}/aligned", mode: 'copy'

    input:
    tuple val(sample_id), path(bam), path(bai)

    output:
    tuple val(sample_id), path("${sample_id}.filtered.bam"), path("${sample_id}.filtered.bam.bai"), emit: bam

    script:
    """
    samtools view -@ ${params.threads} -b -F 1804 -f 2 -q 30 ${bam} \\
        | bedtools intersect -v -abam stdin -b ${params.blacklist} \\
        > ${sample_id}.filtered.bam
    samtools index ${sample_id}.filtered.bam
    """
}

process MACS3_CALLPEAK {
    tag "${sample_id}"
    publishDir "${params.outdir}/peaks", mode: 'copy'

    input:
    tuple val(sample_id), path(bam), path(bai)

    output:
    path "${sample_id}_peaks.narrowPeak"
    path "${sample_id}_summits.bed"

    script:
    """
    macs3 callpeak \\
        -t ${bam} -f BAMPE \\
        -n ${sample_id} \\
        --outdir . \\
        -g ${params.genome_size} \\
        -B --SPMR --keep-dup all --call-summits
    """
}

process BAMCOVERAGE {
    tag "${sample_id}"
    publishDir "${params.outdir}/bigwig", mode: 'copy'

    input:
    tuple val(sample_id), path(bam), path(bai)

    output:
    path "${sample_id}.bw"

    script:
    """
    bamCoverage \\
        -b ${bam} -o ${sample_id}.bw \\
        --binSize 10 --normalizeUsing RPKM \\
        --ignoreDuplicates \\
        -p ${params.threads}
    """
}

process MULTIQC {
    publishDir "${params.outdir}/multiqc", mode: 'copy'

    input:
    path "*"

    output:
    path "multiqc_report.html"

    script:
    """
    multiqc .
    """
}


// ── Workflow ──────────────────────────────────────────────────────────────

workflow {
    FASTP(reads_ch)
    BOWTIE2_ALIGN(FASTP.out.trimmed)
    MARK_DUPLICATES(BOWTIE2_ALIGN.out.bam)
    FILTER_READS(MARK_DUPLICATES.out.bam)
    MACS3_CALLPEAK(FILTER_READS.out.bam)
    BAMCOVERAGE(FILTER_READS.out.bam)

    // QC aggregation — runs in parallel with alignment
    qc_files = FASTP.out.json.collect()
    MULTIQC(qc_files)
}

/*
// nextflow.config (save alongside this .nf file):
process {
    cpus   = 8
    memory = '32 GB'
    time   = '4h'
}
executor {
    name      = 'local'
    cpus      = 32
    memory    = '128 GB'
}
*/