grass_runtime/algorithm/
random.rs

1use crate::{record::Bed3, Genome};
2use rand::{prelude::ThreadRng, thread_rng, Rng};
3
4use super::Sorted;
5
6pub struct SortedRandomInterval {
7    rng: ThreadRng,
8    chrom_sizes: Vec<(&'static str, usize)>,
9    regions: Vec<(usize, usize)>,
10    chrom_idx: usize,
11    flatten_region_begin: usize,
12    flatten_region_end: usize,
13    length_min: usize,
14    length_max: usize,
15    count: usize,
16}
17
18impl Sorted for SortedRandomInterval {}
19
20impl SortedRandomInterval {
21    pub fn new(length_min: usize, length_max: usize, count: usize) -> SortedRandomInterval {
22        let chrom_sizes = Genome::get_chrom_sizes();
23        let mut regions = Vec::new();
24        let mut flatten_region_end = 0;
25        for size in chrom_sizes
26            .iter()
27            .map(|(_, size)| (*size).max(length_min) - length_min)
28        {
29            regions.push((flatten_region_end, flatten_region_end + size));
30            flatten_region_end += size;
31        }
32        Self {
33            rng: thread_rng(),
34            regions,
35            chrom_sizes,
36            chrom_idx: 0,
37            flatten_region_begin: 0,
38            flatten_region_end,
39            length_min,
40            length_max,
41            count,
42        }
43    }
44
45    pub fn generate_next_interval(&mut self) -> Option<Bed3> {
46        if self.count == 0 {
47            return None;
48        }
49        let (beg, end) = self.generate_raw_interval();
50        while self.chrom_idx < self.regions.len() && self.regions[self.chrom_idx].1 < beg {
51            self.chrom_idx += 1;
52        }
53        if self.chrom_idx >= self.regions.len() {
54            return None;
55        }
56        let chr_beg = beg - self.regions[self.chrom_idx].0;
57        let chr_end =
58            (end - self.regions[self.chrom_idx].0).min(self.chrom_sizes[self.chrom_idx].1);
59        let chr_name = Genome::query_chr(self.chrom_sizes[self.chrom_idx].0);
60        self.count -= 1;
61        Some(Bed3 {
62            chrom: chr_name,
63            start: chr_beg as u32,
64            end: chr_end as u32,
65        })
66    }
67
68    fn generate_raw_interval(&mut self) -> (usize, usize) {
69        let begin = self.generate_next_random_point(self.count);
70        let end = self
71            .rng
72            .gen_range(begin + self.length_min..=begin + self.length_max);
73        (begin, end)
74    }
75
76    fn generate_next_random_point(&mut self, k: usize) -> usize {
77        let linear_p: f64 = self.rng.gen_range(0.0..1.0);
78        let adjusted = 1.0 - linear_p.powf(1.0 / k as f64);
79
80        let mapped = self.flatten_region_begin as f64
81            + ((self.flatten_region_end - self.flatten_region_begin) as f64) * adjusted;
82
83        let ret = mapped as usize;
84        self.flatten_region_begin = ret;
85
86        ret
87    }
88}
89
90impl Iterator for SortedRandomInterval {
91    type Item = Bed3;
92
93    fn next(&mut self) -> Option<Self::Item> {
94        self.generate_next_interval()
95    }
96}