grass_runtime/algorithm/
random.rs1use crate::{record::Bed3, Genome};
2use rand::{prelude::ThreadRng, thread_rng, Rng};
3
4use super::Sorted;
5
6pub struct SortedRandomInterval {
7 rng: ThreadRng,
8 chrom_sizes: Vec<(&'static str, usize)>,
9 regions: Vec<(usize, usize)>,
10 chrom_idx: usize,
11 flatten_region_begin: usize,
12 flatten_region_end: usize,
13 length_min: usize,
14 length_max: usize,
15 count: usize,
16}
17
18impl Sorted for SortedRandomInterval {}
19
20impl SortedRandomInterval {
21 pub fn new(length_min: usize, length_max: usize, count: usize) -> SortedRandomInterval {
22 let chrom_sizes = Genome::get_chrom_sizes();
23 let mut regions = Vec::new();
24 let mut flatten_region_end = 0;
25 for size in chrom_sizes
26 .iter()
27 .map(|(_, size)| (*size).max(length_min) - length_min)
28 {
29 regions.push((flatten_region_end, flatten_region_end + size));
30 flatten_region_end += size;
31 }
32 Self {
33 rng: thread_rng(),
34 regions,
35 chrom_sizes,
36 chrom_idx: 0,
37 flatten_region_begin: 0,
38 flatten_region_end,
39 length_min,
40 length_max,
41 count,
42 }
43 }
44
45 pub fn generate_next_interval(&mut self) -> Option<Bed3> {
46 if self.count == 0 {
47 return None;
48 }
49 let (beg, end) = self.generate_raw_interval();
50 while self.chrom_idx < self.regions.len() && self.regions[self.chrom_idx].1 < beg {
51 self.chrom_idx += 1;
52 }
53 if self.chrom_idx >= self.regions.len() {
54 return None;
55 }
56 let chr_beg = beg - self.regions[self.chrom_idx].0;
57 let chr_end =
58 (end - self.regions[self.chrom_idx].0).min(self.chrom_sizes[self.chrom_idx].1);
59 let chr_name = Genome::query_chr(self.chrom_sizes[self.chrom_idx].0);
60 self.count -= 1;
61 Some(Bed3 {
62 chrom: chr_name,
63 start: chr_beg as u32,
64 end: chr_end as u32,
65 })
66 }
67
68 fn generate_raw_interval(&mut self) -> (usize, usize) {
69 let begin = self.generate_next_random_point(self.count);
70 let end = self
71 .rng
72 .gen_range(begin + self.length_min..=begin + self.length_max);
73 (begin, end)
74 }
75
76 fn generate_next_random_point(&mut self, k: usize) -> usize {
77 let linear_p: f64 = self.rng.gen_range(0.0..1.0);
78 let adjusted = 1.0 - linear_p.powf(1.0 / k as f64);
79
80 let mapped = self.flatten_region_begin as f64
81 + ((self.flatten_region_end - self.flatten_region_begin) as f64) * adjusted;
82
83 let ret = mapped as usize;
84 self.flatten_region_begin = ret;
85
86 ret
87 }
88}
89
90impl Iterator for SortedRandomInterval {
91 type Item = Bed3;
92
93 fn next(&mut self) -> Option<Self::Item> {
94 self.generate_next_interval()
95 }
96}