rsomics-bed-random 0.1.0

Generate random BED intervals — bedtools random equivalent
Documentation
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;

use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rsomics_common::{Result, RsomicsError};

pub fn random_bed(
    genome_path: &Path,
    n: u64,
    length: u64,
    seed: u64,
    output: &mut dyn Write,
) -> Result<()> {
    let genome = load_genome(genome_path)?;
    let chroms: Vec<(&String, &u64)> = genome.iter().collect();
    if chroms.is_empty() {
        return Err(RsomicsError::InvalidInput("empty genome file".into()));
    }
    let total_len: u64 = chroms.iter().map(|(_, l)| **l).sum();
    let mut rng = StdRng::seed_from_u64(seed);
    let mut out = BufWriter::with_capacity(64 * 1024, output);

    for _ in 0..n {
        let mut pos_in_genome = rng.gen_range(0..total_len.saturating_sub(length));
        let mut chrom_name = "";
        for (name, clen) in &chroms {
            if pos_in_genome < **clen {
                chrom_name = name;
                break;
            }
            pos_in_genome -= **clen;
        }
        let start = pos_in_genome;
        let end = start + length;
        writeln!(out, "{chrom_name}\t{start}\t{end}").map_err(RsomicsError::Io)?;
    }

    out.flush().map_err(RsomicsError::Io)?;
    Ok(())
}

fn load_genome(path: &Path) -> Result<HashMap<String, u64>> {
    let file = File::open(path)
        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
    let reader = BufReader::new(file);
    let mut map = HashMap::new();
    for line in reader.lines() {
        let line = line.map_err(RsomicsError::Io)?;
        let fields: Vec<&str> = line.split('\t').collect();
        if fields.len() >= 2 {
            let len: u64 = fields[1]
                .parse()
                .map_err(|e| RsomicsError::InvalidInput(format!("bad length: {e}")))?;
            map.insert(fields[0].to_string(), len);
        }
    }
    Ok(map)
}