twitcher 0.3.0

Find template switch mutations in genomic data
use bstr::ByteSlice;

use crate::common::twitcher_cmd;

mod common;

/// Extract a counter value printed by `--stat-output` / `-v` mode from captured stderr.
/// Searches for `{key}: {number}` anywhere in the output (ANSI codes are ignored).
fn parse_counter(stderr: &[u8], key: &str) -> usize {
    let text = String::from_utf8_lossy(stderr);
    let needle = format!("{key}: ");
    for line in text.lines() {
        if let Some(pos) = line.find(&needle) {
            let after = &line[pos + needle.len()..];
            let digits: String = after.chars().take_while(|c| c.is_ascii_digit()).collect();
            if let Ok(n) = digits.parse() {
                return n;
            }
        }
    }
    0
}

const BAM: &str = "tests/data/reads/test.bam";
const REF: &str = "tests/data/reads/test.fa.gz";

fn csv_data_rows(output: &std::process::Output) -> usize {
    // Header is always the first line; all subsequent lines are data rows.
    output.stdout.as_bstr().lines().count().saturating_sub(1)
}

#[test]
fn test_reads_produces_ts_rows() {
    // Default run: output only clusters with detected template switches.
    let cmd = twitcher_cmd(&["reads", BAM, "--reference", REF]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    let rows = csv_data_rows(&cmd);
    assert!(rows > 0, "expected at least one TS row in CSV output");
    // Every data line must be parseable CSV (has the id field, a non-empty uuid-like string).
    for line in cmd.stdout.as_bstr().lines().skip(1) {
        assert!(
            !line.is_empty(),
            "unexpected empty data line in CSV output"
        );
    }
}

#[test]
fn test_reads_all_clusters_includes_non_ts() {
    // --all-clusters emits every cluster regardless of TS detection.
    // The fixture BAM has 8 TS rows (default) and 34 rows total (all clusters).
    let cmd = twitcher_cmd(&["reads", BAM, "--reference", REF, "--all-clusters"]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    let rows = csv_data_rows(&cmd);
    assert!(
        rows > 8,
        "--all-clusters should produce more rows than the TS-only default (8); got {rows}"
    );
}

#[test]
fn test_reads_n_flag_limits_processed_reads() {
    // -n 5 restricts processing to the first 5 reads.
    // The fixture BAM has 8 TS rows without -n; -n 5 must produce fewer.
    let cmd = twitcher_cmd(&["reads", BAM, "--reference", REF, "-n", "5"]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    let rows = csv_data_rows(&cmd);
    assert!(rows < 8, "-n 5 should produce fewer than 8 TS rows; got {rows}");
}

#[test]
fn test_reads_no_ts_suppresses_output() {
    // --no-ts disables TS detection; default output (TS-only) must be empty.
    let cmd = twitcher_cmd(&["reads", BAM, "--reference", REF, "--no-ts"]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    let rows = csv_data_rows(&cmd);
    assert_eq!(rows, 0, "expected no TS rows when --no-ts is set");
}

#[test]
fn test_reads_fpa_detects_template_switches() {
    // --fpa uses the Four-Point-Aligner; must still produce TS rows.
    let cmd = twitcher_cmd(&["reads", BAM, "--reference", REF, "--fpa"]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    let rows = csv_data_rows(&cmd);
    assert!(rows > 0, "expected TS rows with --fpa; got {rows}");
}

#[test]
fn test_reads_output_file_flag() {
    // -o writes CSV to a file instead of stdout.
    let dir = tempfile::tempdir().unwrap();
    let csv_path = dir.path().join("out.csv");
    let cmd = twitcher_cmd(&[
        "reads",
        BAM,
        "--reference",
        REF,
        "-o",
        csv_path.to_str().unwrap(),
    ]);
    let stderr = cmd.stderr.as_bstr();
    eprintln!("{stderr}");
    assert!(cmd.status.success());
    assert!(csv_path.exists(), "output CSV file was not created");
    let content = std::fs::read_to_string(&csv_path).unwrap();
    let mut lines = content.lines();
    let header = lines.next().expect("CSV must have a header row");
    assert!(!header.is_empty());
    assert!(lines.next().is_some(), "expected at least one data row");
}

#[test]
fn test_db_retries_oom_on_larger_memory() {
    let dir = tempfile::tempdir().unwrap();
    let db_path = dir.path().join("cache.db");
    let db_str = db_path.to_str().unwrap();

    // Run 1: tiny memory limit → all alignments OOM → failures stored to DB.
    let run1 = twitcher_cmd(&["reads", BAM, "--reference", REF, "-v", "-m", "1mb", "--threads", "12", "-n", "10", "--db", db_str]);
    let run1_err = String::from_utf8_lossy(&run1.stderr);
    eprintln!("run1 stderr:\n{run1_err}");
    assert!(run1.status.success());
    let computations1 = parse_counter(&run1.stderr, "alignments.computations");
    let oom1 = parse_counter(&run1.stderr, "alignments.results.failed.oom");
    assert!(computations1 > 0, "run 1 should compute alignments");
    assert!(oom1 > 0, "run 1 should produce OOM failures with tiny memory; computations={computations1}");

    // Run 2: identical settings → DB serves every result, nothing recomputed.
    let run2 = twitcher_cmd(&["reads", BAM, "--reference", REF, "-v", "-m", "1mb", "--threads", "12", "-n", "10", "--db", db_str]);
    let run2_err = String::from_utf8_lossy(&run2.stderr);
    eprintln!("run2 stderr:\n{run2_err}");
    assert!(run2.status.success());
    let computations2 = parse_counter(&run2.stderr, "alignments.computations");
    let from_db2 = parse_counter(&run2.stderr, "alignments.from_db");
    assert_eq!(computations2, 0, "run 2 (same memory) should not recompute anything; from_db={from_db2}");
    assert!(from_db2 > 0, "run 2 should serve results from DB");

    // Run 3: larger memory → OOM failures in DB are retried instead of served.
    let run3 = twitcher_cmd(&["reads", BAM, "--reference", REF, "-v", "-m", "1gb", "--threads", "12", "-n", "10", "--db", db_str]);
    let run3_err = String::from_utf8_lossy(&run3.stderr);
    eprintln!("run3 stderr:\n{run3_err}");
    assert!(run3.status.success());
    let computations3 = parse_counter(&run3.stderr, "alignments.computations");
    assert!(
        computations3 > 0,
        "run 3 (larger memory) should retry cached OOM failures; from_db2={from_db2}"
    );

    // Run 4: repeat run 3 settings → retried results now in DB, nothing recomputed.
    let run4 = twitcher_cmd(&["reads", BAM, "--reference", REF, "-v", "-m", "1gb", "--threads", "12", "-n", "10", "--db", db_str]);
    let run4_err = String::from_utf8_lossy(&run4.stderr);
    eprintln!("run4 stderr:\n{run4_err}");
    assert!(run4.status.success());
    let computations4 = parse_counter(&run4.stderr, "alignments.computations");
    let from_db4 = parse_counter(&run4.stderr, "alignments.from_db");
    assert_eq!(computations4, 0, "run 4 should serve everything from DB after retry; from_db={from_db4}");
    assert!(from_db4 > 0, "run 4 should find retried results in DB");
}