rsomics-plink-missing 0.1.0

Per-sample and per-variant genotype missingness from a PLINK1 binary fileset (plink --missing)
Documentation
use anyhow::{Context, bail};
use clap::Parser;
use rsomics_pgen::Pgen;
use rsomics_plink_missing::{missing, write_imiss, write_lmiss};
use std::fs::File;
use std::io::BufWriter;
use std::path::{Path, PathBuf};
use std::process::ExitCode;

#[derive(Parser)]
#[command(
    name = "rsomics-plink-missing",
    about = "Per-sample and per-variant genotype missingness from a PLINK1 fileset (plink --missing)",
    version
)]
struct Cli {
    /// Path prefix for the .bed/.bim/.fam fileset (without extension).
    #[arg(long, conflicts_with_all = ["bed", "bim", "fam"])]
    bfile: Option<PathBuf>,

    /// Explicit .bed path (requires --bim and --fam).
    #[arg(long, requires_all = ["bim", "fam"])]
    bed: Option<PathBuf>,

    /// Explicit .bim path.
    #[arg(long)]
    bim: Option<PathBuf>,

    /// Explicit .fam path.
    #[arg(long)]
    fam: Option<PathBuf>,

    /// Write reports to <OUT>.imiss and <OUT>.lmiss instead of stdout.
    #[arg(long)]
    out: Option<PathBuf>,

    /// Worker threads for the missingness scan.
    #[arg(short, long, default_value_t = 1)]
    threads: usize,
}

fn main() -> ExitCode {
    match run(Cli::parse()) {
        Ok(()) => ExitCode::SUCCESS,
        Err(e) => {
            eprintln!("error: {e:#}");
            ExitCode::FAILURE
        }
    }
}

fn run(cli: Cli) -> anyhow::Result<()> {
    let pgen = load(&cli)?;
    let report = missing(&pgen, cli.threads);

    match &cli.out {
        Some(prefix) => {
            let mut imiss = BufWriter::new(File::create(prefix.with_extension("imiss"))?);
            write_imiss(&report.samples, &mut imiss)?;
            let mut lmiss = BufWriter::new(File::create(prefix.with_extension("lmiss"))?);
            write_lmiss(&report.variants, &mut lmiss)?;
        }
        None => {
            let stdout = std::io::stdout();
            let mut w = BufWriter::new(stdout.lock());
            write_imiss(&report.samples, &mut w)?;
            write_lmiss(&report.variants, &mut w)?;
        }
    }
    Ok(())
}

/// Load via the prefix when given, else stitch the three explicit paths into a
/// temporary prefix (pgen's loader keys off a shared prefix).
fn load(cli: &Cli) -> anyhow::Result<Pgen> {
    if let Some(prefix) = &cli.bfile {
        return Pgen::load(prefix).with_context(|| format!("loading {}", prefix.display()));
    }
    match (&cli.bed, &cli.bim, &cli.fam) {
        (Some(bed), Some(bim), Some(fam)) => load_explicit(bed, bim, fam),
        _ => bail!("provide --bfile <prefix> or all of --bed/--bim/--fam"),
    }
}

fn load_explicit(bed: &Path, bim: &Path, fam: &Path) -> anyhow::Result<Pgen> {
    let dir = tempfile::Builder::new()
        .prefix("rsomics-plink-missing-")
        .tempdir()
        .context("temp dir")?;
    let prefix = dir.path().join("fs");
    symlink_or_copy(bed, &prefix.with_extension("bed"))?;
    symlink_or_copy(bim, &prefix.with_extension("bim"))?;
    symlink_or_copy(fam, &prefix.with_extension("fam"))?;
    Pgen::load(&prefix).context("loading explicit fileset")
}

#[cfg(unix)]
fn symlink_or_copy(src: &Path, dst: &Path) -> anyhow::Result<()> {
    let abs = src
        .canonicalize()
        .with_context(|| format!("{}", src.display()))?;
    std::os::unix::fs::symlink(&abs, dst).with_context(|| format!("symlink {}", dst.display()))
}

#[cfg(not(unix))]
fn symlink_or_copy(src: &Path, dst: &Path) -> anyhow::Result<()> {
    std::fs::copy(src, dst).with_context(|| format!("copy {}", dst.display()))?;
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn cli_definition_is_valid() {
        <Cli as clap::CommandFactory>::command().debug_assert();
    }
}