1use crate::utils::bamlift::*;
2use itertools::{izip, multiunzip};
3use rust_htslib::bam;
4
5#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
6pub struct Ranges {
7 pub starts: Vec<Option<i64>>,
8 pub ends: Vec<Option<i64>>,
9 pub lengths: Vec<Option<i64>>,
10 pub qual: Vec<u8>,
11 pub reference_starts: Vec<Option<i64>>,
12 pub reference_ends: Vec<Option<i64>>,
13 pub reference_lengths: Vec<Option<i64>>,
14 pub seq_len: i64,
15 pub reverse: bool,
16}
17
18impl Ranges {
19 pub fn new(
21 record: &bam::Record,
22 mut forward_starts: Vec<i64>,
23 forward_ends: Option<Vec<i64>>,
24 mut lengths: Option<Vec<i64>>,
25 ) -> Self {
26 let mut single_bp_liftover = false;
27 if forward_ends.is_none() && lengths.is_none() {
29 lengths = Some(vec![1; forward_starts.len()]);
30 single_bp_liftover = true;
31 }
32
33 let mut forward_ends_inclusive: Vec<i64> = match forward_ends {
35 Some(x) => x.into_iter().map(|x| x + 1).collect(),
36 None => forward_starts
37 .iter()
38 .zip(lengths.unwrap().iter())
39 .map(|(&x, &y)| x + y - 1)
40 .collect(),
41 };
42
43 let is_reverse = record.is_reverse();
45 let seq_len = i64::try_from(record.seq_len()).unwrap();
46
47 Self::positions_on_aligned_sequence(&mut forward_starts, is_reverse, seq_len);
49 Self::positions_on_aligned_sequence(&mut forward_ends_inclusive, is_reverse, seq_len);
50 let mut starts = forward_starts;
51 let mut ends = forward_ends_inclusive;
52
53 if record.is_reverse() {
55 std::mem::swap(&mut starts, &mut ends);
56 }
57
58 ends = ends.into_iter().map(|x| x + 1).collect();
60
61 let lengths = starts
63 .iter()
64 .zip(ends.iter())
65 .map(|(&x, &y)| Some(y - x))
66 .collect::<Vec<_>>();
67
68 let (reference_starts, reference_ends, reference_lengths) = if single_bp_liftover {
69 lift_query_range_exact(record, &starts, &starts)
70 } else {
71 lift_query_range(record, &starts, &ends)
72 };
73
74 Ranges {
76 starts: starts.into_iter().map(Some).collect(),
77 ends: ends.into_iter().map(Some).collect(),
78 lengths,
79 qual: vec![0; reference_starts.len()],
80 reference_starts,
81 reference_ends,
82 reference_lengths,
83 seq_len,
84 reverse: is_reverse,
85 }
86 }
87
88 pub fn set_qual(&mut self, qual: Vec<u8>) {
89 assert_eq!(qual.len(), self.starts.len());
90 self.qual = qual;
91 if self.reverse {
92 self.qual.reverse();
93 }
94 }
95
96 fn positions_on_aligned_sequence(input_positions: &mut [i64], is_reverse: bool, seq_len: i64) {
98 if !is_reverse {
99 return;
100 }
101 for p in input_positions.iter_mut() {
103 *p = seq_len - *p - 1;
104 }
105 input_positions.reverse();
106 }
107
108 pub fn get_molecular(&self) -> Vec<Option<(i64, i64, i64)>> {
111 self.starts
112 .iter()
113 .zip(self.ends.iter())
114 .zip(self.lengths.iter())
115 .map(|((s, e), l)| {
116 if let (Some(s), Some(e), Some(l)) = (s, e, l) {
117 Some((*s, *e, *l))
118 } else {
119 None
120 }
121 })
122 .collect()
123 }
124
125 pub fn get_starts(&self) -> Vec<i64> {
126 self.starts.clone().into_iter().flatten().collect()
127 }
128
129 pub fn get_ends(&self) -> Vec<i64> {
130 self.ends.clone().into_iter().flatten().collect()
131 }
132
133 pub fn get_forward_starts(&self) -> Vec<i64> {
134 let mut z = self.get_starts();
135 Self::positions_on_aligned_sequence(&mut z, self.reverse, self.seq_len);
136 z
137 }
138
139 pub fn get_forward_quals(&self) -> Vec<u8> {
140 let mut forward = self.qual.clone();
141 if self.reverse {
142 forward.reverse();
143 }
144 forward
145 }
146
147 pub fn filter_by_qual(&mut self, min_qual: u8) {
149 let to_keep = self
150 .qual
151 .iter()
152 .enumerate()
153 .filter_map(|(i, &q)| if q >= min_qual { Some(i) } else { None })
154 .collect::<Vec<_>>();
155
156 self.starts = to_keep.iter().map(|&i| self.starts[i]).collect();
157 self.ends = to_keep.iter().map(|&i| self.ends[i]).collect();
158 self.lengths = to_keep.iter().map(|&i| self.lengths[i]).collect();
159 self.qual = to_keep.iter().map(|&i| self.qual[i]).collect();
160 self.reference_starts = to_keep.iter().map(|&i| self.reference_starts[i]).collect();
161 self.reference_ends = to_keep.iter().map(|&i| self.reference_ends[i]).collect();
162 self.reference_lengths = to_keep.iter().map(|&i| self.reference_lengths[i]).collect();
163 }
164
165 pub fn filter_starts_at_read_ends(&mut self, strip: i64) {
167 if strip == 0 {
168 return;
169 }
170 let to_keep = self
171 .starts
172 .iter()
173 .enumerate()
174 .filter_map(|(i, &s)| {
175 if let Some(s) = s {
176 if s < strip || s > self.seq_len - strip {
177 None
178 } else {
179 Some(i)
180 }
181 } else {
182 None
183 }
184 })
185 .collect::<Vec<_>>();
186
187 if to_keep.len() != self.starts.len() {
188 log::trace!(
189 "basemods stripped, {} basemods removed",
190 self.starts.len() - to_keep.len()
191 );
192 }
193
194 self.starts = to_keep.iter().map(|&i| self.starts[i]).collect();
195 self.ends = to_keep.iter().map(|&i| self.ends[i]).collect();
196 self.lengths = to_keep.iter().map(|&i| self.lengths[i]).collect();
197 self.qual = to_keep.iter().map(|&i| self.qual[i]).collect();
198 self.reference_starts = to_keep.iter().map(|&i| self.reference_starts[i]).collect();
199 self.reference_ends = to_keep.iter().map(|&i| self.reference_ends[i]).collect();
200 self.reference_lengths = to_keep.iter().map(|&i| self.reference_lengths[i]).collect();
201 }
202
203 pub fn to_strings(&self, reference: bool, skip_none: bool) -> Vec<String> {
204 let (s, e, l, q) = if reference {
205 (
206 &self.reference_starts,
207 &self.reference_ends,
208 &self.reference_lengths,
209 &self.qual,
210 )
211 } else {
212 (&self.starts, &self.ends, &self.lengths, &self.qual)
213 };
214
215 let s = crate::join_by_str_option_can_skip(s, ",", skip_none);
216 let e = crate::join_by_str_option_can_skip(e, ",", skip_none);
217 let l = crate::join_by_str_option_can_skip(l, ",", skip_none);
218 if reference {
219 vec![s, e, l]
220 } else {
221 let q = crate::join_by_str(q, ",");
222 vec![s, e, l, q]
223 }
224 }
225
226 pub fn get_reference(&self) -> Vec<Option<(i64, i64, i64)>> {
229 self.reference_starts
230 .iter()
231 .zip(self.reference_ends.iter())
232 .zip(self.reference_lengths.iter())
233 .map(|((s, e), l)| {
234 if let (Some(s), Some(e), Some(l)) = (s, e, l) {
235 Some((*s, *e, *l))
236 } else {
237 None
238 }
239 })
240 .collect()
241 }
242
243 pub fn merge_ranges(multiple_ranges: Vec<&Self>) -> Self {
244 if multiple_ranges.is_empty() {
245 return Self {
246 starts: vec![],
247 ends: vec![],
248 lengths: vec![],
249 qual: vec![],
250 reference_starts: vec![],
251 reference_ends: vec![],
252 reference_lengths: vec![],
253 seq_len: 0,
254 reverse: false,
255 };
256 }
257 let reverse = multiple_ranges[0].reverse;
259 let seq_len = multiple_ranges[0].seq_len;
260 for r in multiple_ranges.iter() {
261 assert_eq!(r.reverse, reverse);
262 assert_eq!(r.seq_len, seq_len);
263 }
264 let starts = multiple_ranges.iter().flat_map(|r| r.starts.clone());
266 let ends = multiple_ranges.iter().flat_map(|r| r.ends.clone());
267 let lengths = multiple_ranges.iter().flat_map(|r| r.lengths.clone());
268 let qual = multiple_ranges.iter().flat_map(|r| r.qual.clone());
269 let reference_starts = multiple_ranges
270 .iter()
271 .flat_map(|r| r.reference_starts.clone());
272 let reference_ends = multiple_ranges
273 .iter()
274 .flat_map(|r| r.reference_ends.clone());
275 let reference_lengths = multiple_ranges
276 .iter()
277 .flat_map(|r| r.reference_lengths.clone());
278
279 #[allow(clippy::type_complexity)]
280 let mut combo: Vec<(
281 Option<i64>,
282 Option<i64>,
283 Option<i64>,
284 u8,
285 Option<i64>,
286 Option<i64>,
287 Option<i64>,
288 )> = izip!(
289 starts,
290 ends,
291 lengths,
292 qual,
293 reference_starts,
294 reference_ends,
295 reference_lengths
296 )
297 .collect();
298 combo.sort_by_key(|(s, _e, _l, _q, _r_s, _r_e, _r_l)| *s);
300 let (starts, ends, lengths, qual, reference_starts, reference_ends, reference_lengths) =
302 multiunzip(combo);
303
304 Self {
305 starts,
306 ends,
307 lengths,
308 qual,
309 reference_starts,
310 reference_ends,
311 reference_lengths,
312 seq_len,
313 reverse,
314 }
315 }
316}
317
318impl<'a> IntoIterator for &'a Ranges {
319 type Item = (i64, i64, i64, u8, Option<(i64, i64, i64)>);
320 type IntoIter = RangesIterator<'a>;
321
322 fn into_iter(self) -> Self::IntoIter {
323 RangesIterator {
324 row: self,
325 index: 0,
326 }
327 }
328}
329
330pub struct RangesIterator<'a> {
331 row: &'a Ranges,
332 index: usize,
333}
334
335impl<'a> Iterator for RangesIterator<'a> {
336 type Item = (i64, i64, i64, u8, Option<(i64, i64, i64)>);
337 fn next(&mut self) -> Option<Self::Item> {
338 if self.index >= self.row.starts.len() {
339 return None;
340 }
341 let start = self.row.starts[self.index]?;
342 let end = self.row.ends[self.index]?;
343 let length = self.row.lengths[self.index]?;
344 let qual = self.row.qual[self.index];
345 let reference_start = self.row.reference_starts[self.index];
346 let reference_end = self.row.reference_ends[self.index];
347 let reference_length = self.row.reference_lengths[self.index];
348 let reference = match (reference_start, reference_end, reference_length) {
349 (Some(s), Some(e), Some(l)) => Some((s, e, l)),
350 _ => None,
351 };
352 self.index += 1;
353 Some((start, end, length, qual, reference))
354 }
355}