rsomics_bed_random/
lib.rs1use std::collections::HashMap;
2use std::fs::File;
3use std::io::{BufRead, BufReader, BufWriter, Write};
4use std::path::Path;
5
6use rand::rngs::StdRng;
7use rand::{Rng, SeedableRng};
8use rsomics_common::{Result, RsomicsError};
9
10pub fn random_bed(
11 genome_path: &Path,
12 n: u64,
13 length: u64,
14 seed: u64,
15 output: &mut dyn Write,
16) -> Result<()> {
17 let genome = load_genome(genome_path)?;
18 let chroms: Vec<(&String, &u64)> = genome.iter().collect();
19 if chroms.is_empty() {
20 return Err(RsomicsError::InvalidInput("empty genome file".into()));
21 }
22 let total_len: u64 = chroms.iter().map(|(_, l)| **l).sum();
23 let mut rng = StdRng::seed_from_u64(seed);
24 let mut out = BufWriter::with_capacity(64 * 1024, output);
25
26 for _ in 0..n {
27 let mut pos_in_genome = rng.gen_range(0..total_len.saturating_sub(length));
28 let mut chrom_name = "";
29 for (name, clen) in &chroms {
30 if pos_in_genome < **clen {
31 chrom_name = name;
32 break;
33 }
34 pos_in_genome -= **clen;
35 }
36 let start = pos_in_genome;
37 let end = start + length;
38 writeln!(out, "{chrom_name}\t{start}\t{end}").map_err(RsomicsError::Io)?;
39 }
40
41 out.flush().map_err(RsomicsError::Io)?;
42 Ok(())
43}
44
45fn load_genome(path: &Path) -> Result<HashMap<String, u64>> {
46 let file = File::open(path)
47 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
48 let reader = BufReader::new(file);
49 let mut map = HashMap::new();
50 for line in reader.lines() {
51 let line = line.map_err(RsomicsError::Io)?;
52 let fields: Vec<&str> = line.split('\t').collect();
53 if fields.len() >= 2 {
54 let len: u64 = fields[1]
55 .parse()
56 .map_err(|e| RsomicsError::InvalidInput(format!("bad length: {e}")))?;
57 map.insert(fields[0].to_string(), len);
58 }
59 }
60 Ok(map)
61}