1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
use rand::{prelude::ThreadRng, thread_rng, Rng};
use crate::{Genome, record::Bed3};
use super::Sorted;
pub struct SortedRandomInterval {
rng: ThreadRng,
chrom_sizes: Vec<(&'static str, usize)>,
regions: Vec<(usize, usize)>,
chrom_idx: usize,
flatten_region_begin: usize,
flatten_region_end: usize,
length_min: usize,
length_max: usize,
count: usize,
}
impl Sorted for SortedRandomInterval {}
impl SortedRandomInterval {
pub fn new(length_min: usize, length_max: usize, count: usize) -> SortedRandomInterval {
let chrom_sizes = Genome::get_chrom_sizes();
let mut regions = Vec::new();
let mut flatten_region_end = 0;
for size in chrom_sizes.iter().map(|(_, size)| (*size).max(length_min) - length_min) {
regions.push((flatten_region_end, flatten_region_end + size));
flatten_region_end += size;
}
Self {
rng: thread_rng(),
regions,
chrom_sizes,
chrom_idx: 0,
flatten_region_begin: 0,
flatten_region_end,
length_min,
length_max,
count,
}
}
pub fn generate_next_interval(&mut self) -> Option<Bed3> {
if self.count == 0 {
return None;
}
let (beg, end) = self.generate_raw_interval();
while self.chrom_idx < self.regions.len() && self.regions[self.chrom_idx].1 < beg {
self.chrom_idx += 1;
}
if self.chrom_idx >= self.regions.len() {
return None;
}
let chr_beg = beg - self.regions[self.chrom_idx].0;
let chr_end = (end - self.regions[self.chrom_idx].0).min(self.chrom_sizes[self.chrom_idx].1);
let chr_name = Genome::query_chr(self.chrom_sizes[self.chrom_idx].0);
self.count -= 1;
Some(Bed3 {
chrom: chr_name,
start: chr_beg as u32,
end: chr_end as u32,
})
}
fn generate_raw_interval(&mut self) -> (usize, usize) {
let begin = self.generate_next_random_point(self.count);
let end = self.rng.gen_range(begin + self.length_min..=begin + self.length_max);
(begin, end)
}
fn generate_next_random_point(&mut self, k : usize) -> usize {
let linear_p : f64 = self.rng.gen_range(0.0..1.0);
let adjusted = 1.0 - linear_p.powf(1.0 / k as f64);
let mapped = self.flatten_region_begin as f64 + ((self.flatten_region_end - self.flatten_region_begin) as f64) * adjusted;
let ret = mapped as usize;
self.flatten_region_begin = ret;
ret
}
}
impl Iterator for SortedRandomInterval {
type Item = Bed3;
fn next(&mut self) -> Option<Self::Item> {
self.generate_next_interval()
}
}