rsomics-plink-missing 0.1.0

Per-sample and per-variant genotype missingness from a PLINK1 binary fileset (plink --missing)
Documentation
//! Differential compatibility tests against PLINK 1.9 `--missing`.
//!
//! The comparison is field-level: every whitespace-separated token must match,
//! including the `%g`-formatted F_MISS and the numeric chromosome codes. The
//! field VALUES are byte-identical to PLINK; the inter-column padding is not,
//! because PLINK sizes its id columns from an internal buffer whose width
//! depends on `.bim`/`.fam` load order (see README). On the in-repo golden,
//! which lands in PLINK's clean width regime, the bytes match exactly too, so
//! the golden tests assert byte-for-byte. The live differential — run against
//! the upstream `plink` binary when present — asserts field-level, the durable
//! contract on arbitrary input.

use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};

const PLINK_MAJOR: &str = "PLINK v1.9";

fn ours() -> PathBuf {
    PathBuf::from(env!("CARGO_BIN_EXE_rsomics-plink-missing"))
}

fn golden_dir() -> PathBuf {
    Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/golden")
}

fn bfile() -> PathBuf {
    golden_dir().join("small")
}

fn plink_v19() -> bool {
    Command::new("plink")
        .arg("--version")
        .output()
        .map(|o| String::from_utf8_lossy(&o.stdout).starts_with(PLINK_MAJOR))
        .unwrap_or(false)
}

/// Run ours with `--out <prefix>`, returning (.imiss, .lmiss) text.
fn run_ours(out_prefix: &Path, extra: &[&str]) -> (String, String) {
    let mut args: Vec<String> = vec![
        "--bfile".into(),
        bfile().to_string_lossy().into_owned(),
        "--out".into(),
        out_prefix.to_string_lossy().into_owned(),
    ];
    args.extend(extra.iter().map(|s| (*s).to_string()));
    let out = Command::new(ours())
        .args(&args)
        .output()
        .expect("run rsomics-plink-missing");
    assert!(
        out.status.success(),
        "rsomics-plink-missing failed: {}",
        String::from_utf8_lossy(&out.stderr)
    );
    (
        std::fs::read_to_string(out_prefix.with_extension("imiss")).expect("read .imiss"),
        std::fs::read_to_string(out_prefix.with_extension("lmiss")).expect("read .lmiss"),
    )
}

fn scratch() -> tempfile::TempDir {
    tempfile::Builder::new()
        .prefix("plink-missing-compat-")
        .tempdir_in(std::env::var("TMPDIR").unwrap_or_else(|_| "/tmp".into()))
        .expect("tempdir")
}

fn fields(text: &str) -> Vec<Vec<&str>> {
    text.lines()
        .filter(|l| !l.trim().is_empty())
        .map(|l| l.split_whitespace().collect())
        .collect()
}

fn assert_fields_equal(ours: &str, reference: &str) {
    let a = fields(ours);
    let b = fields(reference);
    assert_eq!(a.len(), b.len(), "row count differs");
    for (i, (x, y)) in a.iter().zip(&b).enumerate() {
        assert_eq!(x, y, "row {i} differs:\n ours: {x:?}\n ref:  {y:?}");
    }
}

#[test]
fn imiss_byte_identical_to_golden() {
    let tmp = scratch();
    let (imiss, _) = run_ours(&tmp.path().join("o"), &[]);
    let golden = std::fs::read_to_string(golden_dir().join("small.imiss.golden")).unwrap();
    assert_eq!(
        imiss, golden,
        "imiss differs byte-for-byte from PLINK golden"
    );
}

#[test]
fn lmiss_byte_identical_to_golden() {
    let tmp = scratch();
    let (_, lmiss) = run_ours(&tmp.path().join("o"), &[]);
    let golden = std::fs::read_to_string(golden_dir().join("small.lmiss.golden")).unwrap();
    assert_eq!(
        lmiss, golden,
        "lmiss differs byte-for-byte from PLINK golden"
    );
}

#[test]
fn threaded_output_identical_to_single_thread() {
    let tmp = scratch();
    let (i1, l1) = run_ours(&tmp.path().join("t1"), &["--threads", "1"]);
    let (i4, l4) = run_ours(&tmp.path().join("t4"), &["--threads", "4"]);
    assert_eq!(i1, i4);
    assert_eq!(l1, l4);
}

#[test]
fn header_shape() {
    let tmp = scratch();
    let (imiss, lmiss) = run_ours(&tmp.path().join("o"), &[]);
    let ih: Vec<&str> = imiss.lines().next().unwrap().split_whitespace().collect();
    assert_eq!(
        ih,
        ["FID", "IID", "MISS_PHENO", "N_MISS", "N_GENO", "F_MISS"]
    );
    let lh: Vec<&str> = lmiss.lines().next().unwrap().split_whitespace().collect();
    assert_eq!(lh, ["CHR", "SNP", "N_MISS", "N_GENO", "F_MISS"]);
}

#[test]
fn fields_match_live_plink() {
    if !plink_v19() {
        eprintln!("SKIP: plink (v1.9) not on PATH; skipping live differential");
        return;
    }
    let tmp = scratch();
    let ref_prefix = tmp.path().join("ref");
    let status = Command::new("plink")
        .args([
            "--bfile",
            bfile().to_str().unwrap(),
            "--missing",
            "--allow-no-sex",
            "--out",
            ref_prefix.to_str().unwrap(),
        ])
        .stdout(Stdio::null())
        .stderr(Stdio::null())
        .status()
        .expect("run plink");
    assert!(status.success(), "plink --missing failed");

    let (imiss, lmiss) = run_ours(&tmp.path().join("o"), &[]);
    let ref_imiss = std::fs::read_to_string(ref_prefix.with_extension("imiss")).unwrap();
    let ref_lmiss = std::fs::read_to_string(ref_prefix.with_extension("lmiss")).unwrap();
    assert_fields_equal(&imiss, &ref_imiss);
    assert_fields_equal(&lmiss, &ref_lmiss);
}