Skip to main content

rsomics_bed_random/
lib.rs

1use std::collections::HashMap;
2use std::fs::File;
3use std::io::{BufRead, BufReader, BufWriter, Write};
4use std::path::Path;
5
6use rand::rngs::StdRng;
7use rand::{Rng, SeedableRng};
8use rsomics_common::{Result, RsomicsError};
9
10pub fn random_bed(
11    genome_path: &Path,
12    n: u64,
13    length: u64,
14    seed: u64,
15    output: &mut dyn Write,
16) -> Result<()> {
17    let genome = load_genome(genome_path)?;
18    let chroms: Vec<(&String, &u64)> = genome.iter().collect();
19    if chroms.is_empty() {
20        return Err(RsomicsError::InvalidInput("empty genome file".into()));
21    }
22    let total_len: u64 = chroms.iter().map(|(_, l)| **l).sum();
23    let mut rng = StdRng::seed_from_u64(seed);
24    let mut out = BufWriter::with_capacity(64 * 1024, output);
25
26    for _ in 0..n {
27        let mut pos_in_genome = rng.gen_range(0..total_len.saturating_sub(length));
28        let mut chrom_name = "";
29        for (name, clen) in &chroms {
30            if pos_in_genome < **clen {
31                chrom_name = name;
32                break;
33            }
34            pos_in_genome -= **clen;
35        }
36        let start = pos_in_genome;
37        let end = start + length;
38        writeln!(out, "{chrom_name}\t{start}\t{end}").map_err(RsomicsError::Io)?;
39    }
40
41    out.flush().map_err(RsomicsError::Io)?;
42    Ok(())
43}
44
45fn load_genome(path: &Path) -> Result<HashMap<String, u64>> {
46    let file = File::open(path)
47        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
48    let reader = BufReader::new(file);
49    let mut map = HashMap::new();
50    for line in reader.lines() {
51        let line = line.map_err(RsomicsError::Io)?;
52        let fields: Vec<&str> = line.split('\t').collect();
53        if fields.len() >= 2 {
54            let len: u64 = fields[1]
55                .parse()
56                .map_err(|e| RsomicsError::InvalidInput(format!("bad length: {e}")))?;
57            map.insert(fields[0].to_string(), len);
58        }
59    }
60    Ok(map)
61}