rustqc 0.2.1

Fast RNA-seq QC in a single pass: dupRadar, featureCounts, 8 RSeQC tools, preseq, samtools stats, and Qualimap — reimplemented in Rust
//! CITATIONS.md generation.
//!
//! Writes a Markdown file alongside results documenting which upstream tools
//! RustQC replicated in this run, their validated versions, and citation info.

use crate::config::RnaConfig;
use anyhow::{Context, Result};
use std::io::Write;
use std::path::Path;

struct Citation {
    heading: &'static str,
    description: &'static str,
    reference: &'static str,
    url: &'static str,
    doi: &'static str,
}

const DUPRADAR: Citation = Citation {
    heading: "dupRadar (v1.38.0)",
    description: "RustQC replicates the duplication rate analysis of dupRadar.",
    reference: "Sayols S, Scherzinger D, Klein H. dupRadar: a Bioconductor package for the assessment of PCR artifacts in RNA-Seq data. *BMC Bioinformatics*. 2016;17(1):428.",
    url: "https://bioconductor.org/packages/dupRadar/",
    doi: "10.1186/s12859-016-1276-2",
};

const FEATURECOUNTS: Citation = Citation {
    heading: "featureCounts / Subread (v2.0.6)",
    description: "RustQC produces featureCounts-compatible gene-level read counts.",
    reference: "Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. *Bioinformatics*. 2014;30(7):923-930.",
    url: "http://subread.sourceforge.net/",
    doi: "10.1093/bioinformatics/btt656",
};

const RSEQC: Citation = Citation {
    heading: "RSeQC (v5.0.4)",
    description: "RustQC reimplements eight RSeQC quality control tools.",
    reference: "Wang L, Wang S, Li W. RSeQC: quality control of RNA-seq experiments. *Bioinformatics*. 2012;28(16):2184-2185.",
    url: "https://rseqc.sourceforge.net/",
    doi: "10.1093/bioinformatics/bts356",
};

const PRESEQ: Citation = Citation {
    heading: "Preseq (v3.2.0)",
    description: "RustQC reimplements library complexity estimation from Preseq.",
    reference: "Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. *Nature Methods*. 2013;10(4):325-327.",
    url: "https://github.com/smithlabcode/preseq",
    doi: "10.1038/nmeth.2375",
};

const SAMTOOLS: Citation = Citation {
    heading: "Samtools (v1.22.1)",
    description: "RustQC produces Samtools-compatible flagstat, idxstats, and stats output.",
    reference: "Danecek P, Bonfield JK, Liddle J, et al. Twelve years of Samtools and BCFtools. *GigaScience*. 2021;10(2):giab008.",
    url: "http://www.htslib.org/",
    doi: "10.1093/gigascience/giab008",
};

const QUALIMAP: Citation = Citation {
    heading: "Qualimap (v2.3)",
    description: "RustQC produces gene body coverage output compatible with Qualimap rnaseq.",
    reference: "Garcia-Alcalde F, Okonechnikov K, Carbonell J, et al. Qualimap: evaluating next-generation sequencing alignment data. *Bioinformatics*. 2012;28(20):2678-2679.",
    url: "http://qualimap.conesalab.org/",
    doi: "10.1093/bioinformatics/bts503",
};

fn write_citation(w: &mut impl Write, c: &Citation) -> std::io::Result<()> {
    writeln!(w, "## {}\n", c.heading)?;
    writeln!(w, "{}\n", c.description)?;
    writeln!(w, "> {}\n", c.reference)?;
    writeln!(w, "- Website: <{}>", c.url)?;
    writeln!(w, "- DOI: [{}](https://doi.org/{})", c.doi, c.doi)?;
    writeln!(w)
}

/// Write a `CITATIONS.md` file listing upstream tools used in this run.
pub fn write_citations(path: &Path, config: &RnaConfig, version: &str, commit: &str) -> Result<()> {
    let file = std::fs::File::create(path)
        .with_context(|| format!("Failed to create citations file: {}", path.display()))?;
    let mut w = std::io::BufWriter::new(file);

    writeln!(w, "# RustQC Citations\n")?;
    writeln!(
        w,
        "This file was generated by [RustQC](https://github.com/seqeralabs/RustQC) v{version} ({commit})."
    )?;
    writeln!(
        w,
        "It documents the upstream tools whose behaviour this run replicated."
    )?;
    writeln!(
        w,
        "Please cite both RustQC and the relevant upstream tools listed below.\n"
    )?;
    writeln!(w, "## RustQC (v{version})\n")?;
    writeln!(w, "- Repository: <https://github.com/seqeralabs/RustQC>\n")?;

    if config.any_dupradar_output() {
        write_citation(&mut w, &DUPRADAR)?;
    }
    if config.any_featurecounts_output() || config.any_biotype_output() {
        write_citation(&mut w, &FEATURECOUNTS)?;
    }
    if config.any_rseqc_output() {
        write_citation(&mut w, &RSEQC)?;
    }
    if config.preseq.enabled {
        write_citation(&mut w, &PRESEQ)?;
    }
    if config.any_samtools_output() {
        write_citation(&mut w, &SAMTOOLS)?;
    }
    if config.qualimap.enabled {
        write_citation(&mut w, &QUALIMAP)?;
    }

    w.flush()?;
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    use std::sync::atomic::{AtomicU64, Ordering};

    static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);

    fn read_citations(config: &RnaConfig) -> String {
        let id = TEST_COUNTER.fetch_add(1, Ordering::Relaxed);
        let path = std::env::temp_dir().join(format!(
            "rustqc_test_citations_{}_{id}.md",
            std::process::id()
        ));
        write_citations(&path, config, "0.1.0", "abc1234").unwrap();
        let content = std::fs::read_to_string(&path).unwrap();
        let _ = std::fs::remove_file(&path);
        content
    }

    #[test]
    fn default_config_includes_all_tools() {
        let content = read_citations(&RnaConfig::default());
        assert!(content.starts_with("# RustQC Citations"));
        assert!(content.contains("## dupRadar"));
        assert!(content.contains("## featureCounts"));
        assert!(content.contains("## RSeQC"));
        assert!(content.contains("## Preseq"));
        assert!(content.contains("## Samtools"));
        assert!(content.contains("## Qualimap"));
    }

    #[test]
    fn version_and_commit_interpolated() {
        let content = read_citations(&RnaConfig::default());
        assert!(content.contains("v0.1.0 (abc1234)"));
    }

    #[test]
    fn disabled_tools_omitted() {
        let yaml = r#"
            dupradar:
                dup_matrix: false
                intercept_slope: false
                density_scatter_plot: false
                boxplot: false
                expression_histogram: false
                multiqc_intercept: false
                multiqc_curve: false
            preseq:
                enabled: false
            qualimap:
                enabled: false
        "#;
        let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
        let content = read_citations(&config);

        assert!(!content.contains("## dupRadar"));
        assert!(!content.contains("## Preseq"));
        assert!(!content.contains("## Qualimap"));
        // These should still be present (enabled by default)
        assert!(content.contains("## featureCounts"));
        assert!(content.contains("## RSeQC"));
        assert!(content.contains("## Samtools"));
    }

    #[test]
    fn all_disabled_only_header() {
        let yaml = r#"
            dupradar:
                dup_matrix: false
                intercept_slope: false
                density_scatter_plot: false
                boxplot: false
                expression_histogram: false
                multiqc_intercept: false
                multiqc_curve: false
            featurecounts:
                counts_file: false
                summary_file: false
                biotype_summary_file: false
                biotype_counts: false
                biotype_counts_mqc: false
                biotype_rrna_mqc: false
            bam_stat:
                enabled: false
            infer_experiment:
                enabled: false
            read_duplication:
                enabled: false
            read_distribution:
                enabled: false
            junction_annotation:
                enabled: false
            junction_saturation:
                enabled: false
            inner_distance:
                enabled: false
            tin:
                enabled: false
            flagstat:
                enabled: false
            idxstats:
                enabled: false
            samtools_stats:
                enabled: false
            preseq:
                enabled: false
            qualimap:
                enabled: false
        "#;
        let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
        let content = read_citations(&config);

        assert!(content.contains("# RustQC Citations"));
        assert!(content.contains("## RustQC"));
        assert!(!content.contains("## dupRadar"));
        assert!(!content.contains("## featureCounts"));
        assert!(!content.contains("## RSeQC"));
        assert!(!content.contains("## Preseq"));
        assert!(!content.contains("## Samtools"));
        assert!(!content.contains("## Qualimap"));
    }
}