1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
use rand::{prelude::ThreadRng, thread_rng, Rng};
use crate::{Genome, record::Bed3};

use super::Sorted;


pub struct SortedRandomInterval {
    rng: ThreadRng,
    chrom_sizes: Vec<(&'static str, usize)>,
    regions: Vec<(usize, usize)>,
    chrom_idx: usize,
    flatten_region_begin: usize,
    flatten_region_end: usize,
    length_min: usize,
    length_max: usize,
    count: usize,
}

impl Sorted for SortedRandomInterval {}

impl SortedRandomInterval {
    pub fn new(length_min: usize, length_max: usize, count: usize) -> SortedRandomInterval {
        let chrom_sizes = Genome::get_chrom_sizes();
        let mut regions = Vec::new();
        let mut flatten_region_end = 0;
        for size in chrom_sizes.iter().map(|(_, size)| (*size).max(length_min) - length_min) {
            regions.push((flatten_region_end, flatten_region_end + size));
            flatten_region_end += size;
        }
        Self {
            rng: thread_rng(),
            regions,
            chrom_sizes,
            chrom_idx: 0,
            flatten_region_begin: 0,
            flatten_region_end,
            length_min,
            length_max,
            count,
        }
    }

    pub fn generate_next_interval(&mut self) -> Option<Bed3> {
        if self.count == 0 {
            return None;
        }
        let (beg, end) = self.generate_raw_interval();
        while self.chrom_idx < self.regions.len() && self.regions[self.chrom_idx].1 < beg {
            self.chrom_idx += 1;
        }
        if self.chrom_idx >= self.regions.len() {
            return None;
        }
        let chr_beg = beg - self.regions[self.chrom_idx].0;
        let chr_end = (end - self.regions[self.chrom_idx].0).min(self.chrom_sizes[self.chrom_idx].1);
        let chr_name = Genome::query_chr(self.chrom_sizes[self.chrom_idx].0);
        self.count -= 1;
        Some(Bed3 {
                chrom: chr_name,
                start: chr_beg as u32,
                end: chr_end as u32,
        })
    }

    fn generate_raw_interval(&mut self) -> (usize, usize) {
        let begin = self.generate_next_random_point(self.count);
        let end = self.rng.gen_range(begin + self.length_min..=begin + self.length_max);
        (begin, end)
    }

    fn generate_next_random_point(&mut self, k : usize) -> usize {
        let linear_p : f64 = self.rng.gen_range(0.0..1.0);
        let adjusted = 1.0 - linear_p.powf(1.0 / k as f64);

        let mapped = self.flatten_region_begin as f64 + ((self.flatten_region_end - self.flatten_region_begin) as f64) * adjusted;

        let ret = mapped as usize;
        self.flatten_region_begin = ret;

        ret
    }
}

impl Iterator for SortedRandomInterval {
    type Item = Bed3;

    fn next(&mut self) -> Option<Self::Item> {
        self.generate_next_interval()
    }
}