1use crate::genomic::GenomicInterval;
8
9#[derive(Debug, Clone)]
15pub struct RleCoverage {
16 runs: Vec<(u32, u64)>,
18 total_length: u64,
20}
21
22impl RleCoverage {
23 pub fn from_intervals(intervals: &[GenomicInterval], chrom_length: u64) -> Self {
28 if chrom_length == 0 {
29 return Self {
30 runs: Vec::new(),
31 total_length: 0,
32 };
33 }
34
35 let mut events: Vec<(u64, i32)> = Vec::with_capacity(intervals.len() * 2);
37 for iv in intervals {
38 let start = iv.start.min(chrom_length);
39 let end = iv.end.min(chrom_length);
40 if start < end {
41 events.push((start, 1));
42 events.push((end, -1));
43 }
44 }
45 events.sort_by_key(|&(pos, delta)| (pos, std::cmp::Reverse(delta)));
46
47 let mut runs = Vec::new();
49 let mut depth: i64 = 0;
50 let mut prev_pos: u64 = 0;
51
52 for (pos, delta) in &events {
53 let pos = *pos;
54 if pos > prev_pos {
55 runs.push((depth as u32, pos - prev_pos));
56 prev_pos = pos;
57 }
58 depth += *delta as i64;
59 }
60
61 if prev_pos < chrom_length {
63 runs.push((depth as u32, chrom_length - prev_pos));
64 }
65
66 let merged = merge_runs(runs);
68
69 Self {
70 runs: merged,
71 total_length: chrom_length,
72 }
73 }
74
75 pub fn from_depths(depths: &[u32]) -> Self {
77 if depths.is_empty() {
78 return Self {
79 runs: Vec::new(),
80 total_length: 0,
81 };
82 }
83
84 let mut runs = Vec::new();
85 let mut current_depth = depths[0];
86 let mut run_len: u64 = 1;
87
88 for &d in &depths[1..] {
89 if d == current_depth {
90 run_len += 1;
91 } else {
92 runs.push((current_depth, run_len));
93 current_depth = d;
94 run_len = 1;
95 }
96 }
97 runs.push((current_depth, run_len));
98
99 Self {
100 runs,
101 total_length: depths.len() as u64,
102 }
103 }
104
105 pub fn get(&self, position: u64) -> u32 {
109 if position >= self.total_length {
110 return 0;
111 }
112
113 let mut offset: u64 = 0;
114 for &(depth, length) in &self.runs {
115 offset += length;
116 if position < offset {
117 return depth;
118 }
119 }
120
121 0
122 }
123
124 pub fn mean_coverage(&self) -> f64 {
126 if self.total_length == 0 {
127 return 0.0;
128 }
129
130 let sum: u64 = self
131 .runs
132 .iter()
133 .map(|&(depth, length)| depth as u64 * length)
134 .sum();
135
136 sum as f64 / self.total_length as f64
137 }
138
139 pub fn bases_above(&self, threshold: u32) -> u64 {
141 self.runs
142 .iter()
143 .filter(|&&(depth, _)| depth > threshold)
144 .map(|&(_, length)| length)
145 .sum()
146 }
147
148 pub fn to_vec(&self) -> Vec<u32> {
150 let mut result = Vec::with_capacity(self.total_length as usize);
151 for &(depth, length) in &self.runs {
152 result.extend(std::iter::repeat(depth).take(length as usize));
153 }
154 result
155 }
156
157 pub fn len(&self) -> u64 {
159 self.total_length
160 }
161
162 pub fn is_empty(&self) -> bool {
164 self.total_length == 0
165 }
166
167 pub fn num_runs(&self) -> usize {
169 self.runs.len()
170 }
171}
172
173fn merge_runs(runs: Vec<(u32, u64)>) -> Vec<(u32, u64)> {
174 if runs.is_empty() {
175 return runs;
176 }
177
178 let mut merged = Vec::with_capacity(runs.len());
179 let mut current = runs[0];
180
181 for &(depth, length) in &runs[1..] {
182 if depth == current.0 {
183 current.1 += length;
184 } else {
185 merged.push(current);
186 current = (depth, length);
187 }
188 }
189 merged.push(current);
190
191 merged
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 fn iv(start: u64, end: u64) -> GenomicInterval {
199 GenomicInterval::new("chr1", start, end).unwrap()
200 }
201
202 #[test]
203 fn empty_coverage() {
204 let cov = RleCoverage::from_intervals(&[], 100);
205 assert_eq!(cov.len(), 100);
206 assert_eq!(cov.mean_coverage(), 0.0);
207 assert_eq!(cov.bases_above(0), 0);
208 assert_eq!(cov.get(50), 0);
209 }
210
211 #[test]
212 fn zero_length() {
213 let cov = RleCoverage::from_intervals(&[], 0);
214 assert!(cov.is_empty());
215 assert_eq!(cov.mean_coverage(), 0.0);
216 }
217
218 #[test]
219 fn single_interval() {
220 let cov = RleCoverage::from_intervals(&[iv(10, 20)], 100);
221 assert_eq!(cov.get(5), 0);
222 assert_eq!(cov.get(10), 1);
223 assert_eq!(cov.get(15), 1);
224 assert_eq!(cov.get(19), 1);
225 assert_eq!(cov.get(20), 0);
226 assert_eq!(cov.get(50), 0);
227 assert_eq!(cov.bases_above(0), 10);
228 assert!((cov.mean_coverage() - 0.1).abs() < 1e-10);
229 }
230
231 #[test]
232 fn overlapping_intervals() {
233 let cov = RleCoverage::from_intervals(&[iv(10, 30), iv(20, 40)], 100);
234 assert_eq!(cov.get(15), 1); assert_eq!(cov.get(25), 2); assert_eq!(cov.get(35), 1); assert_eq!(cov.get(45), 0); }
239
240 #[test]
241 fn from_depths_basic() {
242 let depths = vec![0, 0, 1, 1, 1, 2, 2, 0];
243 let cov = RleCoverage::from_depths(&depths);
244 assert_eq!(cov.len(), 8);
245 assert_eq!(cov.num_runs(), 4);
246 assert_eq!(cov.get(0), 0);
247 assert_eq!(cov.get(2), 1);
248 assert_eq!(cov.get(5), 2);
249 assert_eq!(cov.get(7), 0);
250 }
251
252 #[test]
253 fn from_depths_empty() {
254 let cov = RleCoverage::from_depths(&[]);
255 assert!(cov.is_empty());
256 }
257
258 #[test]
259 fn round_trip() {
260 let depths = vec![0, 0, 3, 3, 3, 1, 0, 0, 0, 5];
261 let cov = RleCoverage::from_depths(&depths);
262 let expanded = cov.to_vec();
263 assert_eq!(expanded, depths);
264 }
265
266 #[test]
267 fn mean_coverage_value() {
268 let cov = RleCoverage::from_depths(&[0, 1, 2, 3, 4]);
269 assert!((cov.mean_coverage() - 2.0).abs() < 1e-10);
270 }
271
272 #[test]
273 fn bases_above_threshold() {
274 let cov = RleCoverage::from_depths(&[0, 1, 2, 3, 4, 5]);
275 assert_eq!(cov.bases_above(2), 3); assert_eq!(cov.bases_above(0), 5); assert_eq!(cov.bases_above(10), 0);
278 }
279
280 #[test]
281 fn get_out_of_range() {
282 let cov = RleCoverage::from_depths(&[1, 2, 3]);
283 assert_eq!(cov.get(3), 0);
284 assert_eq!(cov.get(100), 0);
285 }
286
287 #[test]
288 fn interval_coverage_to_vec() {
289 let cov = RleCoverage::from_intervals(&[iv(2, 5)], 8);
290 let expanded = cov.to_vec();
291 assert_eq!(expanded, vec![0, 0, 1, 1, 1, 0, 0, 0]);
292 }
293
294 #[test]
295 fn heavily_overlapping() {
296 let intervals: Vec<GenomicInterval> = (0..5).map(|_| iv(0, 10)).collect();
298 let cov = RleCoverage::from_intervals(&intervals, 10);
299 assert_eq!(cov.get(0), 5);
300 assert_eq!(cov.get(9), 5);
301 assert_eq!(cov.num_runs(), 1); }
303
304 #[test]
305 fn uniform_depth() {
306 let depths = vec![3; 1000];
307 let cov = RleCoverage::from_depths(&depths);
308 assert_eq!(cov.num_runs(), 1);
309 assert!((cov.mean_coverage() - 3.0).abs() < 1e-10);
310 }
311
312 #[test]
313 fn intervals_beyond_chrom_length() {
314 let cov = RleCoverage::from_intervals(&[iv(5, 200)], 10);
316 assert_eq!(cov.len(), 10);
317 assert_eq!(cov.get(5), 1);
318 assert_eq!(cov.get(9), 1);
319 let expanded = cov.to_vec();
320 assert_eq!(expanded.len(), 10);
321 }
322}