twitcher 0.2.2

Find template switch mutations in genomic data
use std::io::Write;

use crate::common::twitcher_cmd;

mod common;

const REF: &str = "tests/data/reads/test.fa.gz";
/// Pre-generated CSV from `twitcher reads` on the HiFi BAM fixture.
const FIXTURE_CSV: &str = "tests/data/reads/test_ts.csv";

fn svg_count(dir: &std::path::Path) -> usize {
    std::fs::read_dir(dir)
        .unwrap()
        .filter(|e| {
            e.as_ref()
                .unwrap()
                .path()
                .extension()
                .is_some_and(|ext| ext == "svg")
        })
        .count()
}

#[test]
fn test_viz_produces_one_svg_per_row() {
    // viz emits one SVG file per input CSV row, named by the row's id field.
    let dir = tempfile::tempdir().unwrap();
    let cmd = twitcher_cmd(&[
        "viz",
        FIXTURE_CSV,
        "--reference",
        REF,
        "--output-dir",
        dir.path().to_str().unwrap(),
    ]);
    eprintln!("{}", cmd.stderr.as_slice().escape_ascii());
    assert!(cmd.status.success());

    let expected_rows = std::fs::read_to_string(FIXTURE_CSV)
        .unwrap()
        .lines()
        .count()
        .saturating_sub(1); // skip header

    let svgs = svg_count(dir.path());
    assert_eq!(svgs, expected_rows, "expected one SVG per input row");
}

#[test]
fn test_viz_ids_filter_limits_output() {
    // --ids restricts output to a single named id.
    let csv = std::fs::read_to_string(FIXTURE_CSV).unwrap();
    let first_id = csv
        .lines()
        .nth(1)
        .expect("CSV must have a data row")
        .split(',')
        .next()
        .expect("row must have an id field");

    let dir = tempfile::tempdir().unwrap();
    let cmd = twitcher_cmd(&[
        "viz",
        FIXTURE_CSV,
        "--reference",
        REF,
        "--output-dir",
        dir.path().to_str().unwrap(),
        "--ids",
        first_id,
    ]);
    eprintln!("{}", cmd.stderr.as_slice().escape_ascii());
    assert!(cmd.status.success());

    let svgs = svg_count(dir.path());
    assert_eq!(svgs, 1, "expected exactly one SVG when --ids names a single id");

    let expected_name = format!("{first_id}.svg");
    assert!(
        dir.path().join(&expected_name).exists(),
        "expected SVG named {expected_name}"
    );
}

#[test]
fn test_viz_svg_content_is_valid_xml() {
    // Each SVG must be valid XML (starts with an XML or SVG opening tag).
    let dir = tempfile::tempdir().unwrap();
    let cmd = twitcher_cmd(&[
        "viz",
        FIXTURE_CSV,
        "--reference",
        REF,
        "--output-dir",
        dir.path().to_str().unwrap(),
    ]);
    assert!(cmd.status.success());

    for entry in std::fs::read_dir(dir.path()).unwrap() {
        let path = entry.unwrap().path();
        if path.extension().is_some_and(|e| e == "svg") {
            let content = std::fs::read_to_string(&path).unwrap();
            assert!(
                content.starts_with("<?xml") || content.starts_with("<svg"),
                "SVG file {:?} does not start with a valid XML/SVG tag",
                path.file_name().unwrap()
            );
        }
    }
}

#[test]
fn test_viz_no_overwrite_preserves_existing_svgs() {
    // Without --overwrite, viz skips files that already exist.
    let dir = tempfile::tempdir().unwrap();

    // First run: create SVGs.
    twitcher_cmd(&[
        "viz",
        FIXTURE_CSV,
        "--reference",
        REF,
        "--output-dir",
        dir.path().to_str().unwrap(),
    ]);

    // Append a sentinel byte to the first SVG to mark it as "existing".
    let mut first_svg = None;
    for entry in std::fs::read_dir(dir.path()).unwrap() {
        let p = entry.unwrap().path();
        if p.extension().is_some_and(|e| e == "svg") {
            first_svg = Some(p);
            break;
        }
    }
    let first_svg = first_svg.expect("expected at least one SVG from first run");
    let original_len = std::fs::metadata(&first_svg).unwrap().len();
    std::fs::OpenOptions::new()
        .append(true)
        .open(&first_svg)
        .unwrap()
        .write_all(b"<!-- sentinel -->")
        .unwrap();

    // Second run without --overwrite: the sentinel must still be present.
    let cmd = twitcher_cmd(&[
        "viz",
        FIXTURE_CSV,
        "--reference",
        REF,
        "--output-dir",
        dir.path().to_str().unwrap(),
    ]);
    assert!(cmd.status.success());

    let new_len = std::fs::metadata(&first_svg).unwrap().len();
    assert!(
        new_len > original_len,
        "SVG was overwritten despite --overwrite not being set (len {new_len} should be > {original_len})"
    );
}