1use crate::utils::bamlift::*;
2use rust_htslib::bam;
3use rust_htslib::bam::ext::BamRecordExtensions;
4
5#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
6pub struct FiberAnnotation {
7 pub start: i64,
8 pub end: i64,
9 pub length: i64,
10 pub qual: u8,
11 pub reference_start: Option<i64>,
12 pub reference_end: Option<i64>,
13 pub reference_length: Option<i64>,
14 pub extra_columns: Option<Vec<String>>,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
18pub struct FiberAnnotations {
19 pub annotations: Vec<FiberAnnotation>,
20 pub seq_len: i64,
21 pub reverse: bool,
22}
23
24pub type Ranges = FiberAnnotations;
26
27impl FiberAnnotations {
28 pub fn from_annotations(
30 mut annotations: Vec<FiberAnnotation>,
31 seq_len: i64,
32 reverse: bool,
33 ) -> Self {
34 annotations.sort_by_key(|a| a.start);
36
37 Self {
38 annotations,
39 seq_len,
40 reverse,
41 }
42 }
43
44 pub fn new(
46 record: &bam::Record,
47 mut forward_starts: Vec<i64>,
48 forward_ends: Option<Vec<i64>>,
49 mut lengths: Option<Vec<i64>>,
50 ) -> Self {
51 let mut single_bp_liftover = false;
52 if forward_ends.is_none() && lengths.is_none() {
54 lengths = Some(vec![1; forward_starts.len()]);
55 single_bp_liftover = true;
56 }
57
58 let mut forward_ends_inclusive: Vec<i64> = match forward_ends {
60 Some(x) => x.into_iter().map(|x| x + 1).collect(),
61 None => forward_starts
62 .iter()
63 .zip(lengths.unwrap().iter())
64 .map(|(&x, &y)| x + y - 1)
65 .collect(),
66 };
67
68 let is_reverse = record.is_reverse();
70 let seq_len = i64::try_from(record.seq_len()).unwrap();
71
72 Self::positions_on_aligned_sequence(&mut forward_starts, is_reverse, seq_len);
74 Self::positions_on_aligned_sequence(&mut forward_ends_inclusive, is_reverse, seq_len);
75 let mut starts = forward_starts;
76 let mut ends = forward_ends_inclusive;
77
78 if record.is_reverse() {
80 std::mem::swap(&mut starts, &mut ends);
81 }
82
83 ends = ends.into_iter().map(|x| x + 1).collect();
85
86 let lengths = starts
88 .iter()
89 .zip(ends.iter())
90 .map(|(&x, &y)| Some(y - x))
91 .collect::<Vec<_>>();
92
93 let (reference_starts, reference_ends, reference_lengths) = if single_bp_liftover {
94 lift_query_range_exact(record, &starts, &starts)
95 } else {
96 lift_query_range(record, &starts, &ends)
97 }
98 .unwrap_or_else(|e| {
99 log::error!(
100 "Failed lifting over annotations in BAM record: {} aligned from {} to {}.",
101 String::from_utf8_lossy(record.qname()),
102 record.reference_start() + 1,
103 record.reference_end()
104 );
105 log::error!("Failed to lift query range: {}", e);
106 panic!("Failed to lift query range: {}", e);
107 });
108
109 let mut annotations: Vec<FiberAnnotation> = starts
111 .into_iter()
112 .zip(ends)
113 .zip(lengths)
114 .zip(reference_starts)
115 .zip(reference_ends)
116 .zip(reference_lengths)
117 .map(
118 |(((((start, end), length), ref_start), ref_end), ref_length)| FiberAnnotation {
119 start,
120 end,
121 length: length.unwrap_or(end - start),
122 qual: 0,
123 reference_start: ref_start,
124 reference_end: ref_end,
125 reference_length: ref_length,
126 extra_columns: None,
127 },
128 )
129 .collect();
130
131 annotations.sort_by_key(|a| a.start);
133
134 FiberAnnotations {
136 annotations,
137 seq_len,
138 reverse: is_reverse,
139 }
140 }
141
142 pub fn set_qual(&mut self, mut forward_qual: Vec<u8>) {
143 assert_eq!(forward_qual.len(), self.annotations.len());
144 if self.reverse {
146 forward_qual.reverse();
147 }
148
149 for (annotation, &q) in self.annotations.iter_mut().zip(forward_qual.iter()) {
150 annotation.qual = q;
151 }
152 }
153
154 pub fn starts(&self) -> Vec<i64> {
156 self.annotations.iter().map(|a| a.start).collect()
157 }
158
159 pub fn ends(&self) -> Vec<i64> {
160 self.annotations.iter().map(|a| a.end).collect()
161 }
162
163 pub fn lengths(&self) -> Vec<i64> {
164 self.annotations.iter().map(|a| a.length).collect()
165 }
166
167 pub fn option_starts(&self) -> Vec<Option<i64>> {
169 self.annotations.iter().map(|a| Some(a.start)).collect()
170 }
171 pub fn option_ends(&self) -> Vec<Option<i64>> {
172 self.annotations.iter().map(|a| Some(a.end)).collect()
173 }
174 pub fn option_lengths(&self) -> Vec<Option<i64>> {
175 self.annotations.iter().map(|a| Some(a.length)).collect()
176 }
177
178 pub fn qual(&self) -> Vec<u8> {
179 self.annotations.iter().map(|a| a.qual).collect()
180 }
181
182 pub fn reference_starts(&self) -> Vec<Option<i64>> {
183 self.annotations.iter().map(|a| a.reference_start).collect()
184 }
185
186 pub fn reference_ends(&self) -> Vec<Option<i64>> {
187 self.annotations.iter().map(|a| a.reference_end).collect()
188 }
189
190 pub fn reference_lengths(&self) -> Vec<Option<i64>> {
191 self.annotations
192 .iter()
193 .map(|a| a.reference_length)
194 .collect()
195 }
196
197 fn positions_on_aligned_sequence(input_positions: &mut [i64], is_reverse: bool, seq_len: i64) {
199 if !is_reverse {
200 return;
201 }
202 for p in input_positions.iter_mut() {
204 *p = seq_len - *p - 1;
205 }
206 input_positions.reverse();
207 }
208
209 pub fn get_molecular(&self) -> Vec<Option<(i64, i64, i64)>> {
212 self.annotations
213 .iter()
214 .map(|annotation| Some((annotation.start, annotation.end, annotation.length)))
215 .collect()
216 }
217
218 pub fn forward_starts(&self) -> Vec<i64> {
219 if !self.reverse {
220 self.starts()
222 } else {
223 let mut forward_starts: Vec<i64> = self
229 .annotations
230 .iter()
231 .map(|a| self.seq_len - a.start - 1)
232 .collect();
233
234 forward_starts.reverse();
236 forward_starts
237 }
238 }
239
240 pub fn get_forward_quals(&self) -> Vec<u8> {
241 let mut forward: Vec<u8> = self.annotations.iter().map(|a| a.qual).collect();
242 if self.reverse {
243 forward.reverse();
244 }
245 forward
246 }
247
248 pub fn filter_by_qual(&mut self, min_qual: u8) {
250 self.annotations
251 .retain(|annotation| annotation.qual >= min_qual);
252 }
253
254 pub fn filter_starts_at_read_ends(&mut self, strip: i64) {
256 if strip == 0 {
257 return;
258 }
259
260 let original_len = self.annotations.len();
261 self.annotations.retain(|annotation| {
262 annotation.start >= strip && annotation.start <= self.seq_len - strip
263 });
264
265 if self.annotations.len() != original_len {
266 log::trace!(
267 "basemods stripped, {} basemods removed",
268 original_len - self.annotations.len()
269 );
270 }
271 }
272
273 pub fn to_strings(&self, reference: bool, skip_none: bool) -> Vec<String> {
274 let (s, e, l, q) = if reference {
275 (
276 self.reference_starts(),
277 self.reference_ends(),
278 self.reference_lengths(),
279 self.qual(),
280 )
281 } else {
282 (
283 self.option_starts(),
284 self.option_ends(),
285 self.option_lengths(),
286 self.qual(),
287 )
288 };
289
290 let s = crate::join_by_str_option_can_skip(&s, ",", skip_none);
291 let e = crate::join_by_str_option_can_skip(&e, ",", skip_none);
292 let l = crate::join_by_str_option_can_skip(&l, ",", skip_none);
293 if reference {
294 vec![s, e, l]
295 } else {
296 let q = crate::join_by_str(&q, ",");
297 vec![s, e, l, q]
298 }
299 }
300
301 pub fn get_reference(&self) -> Vec<Option<(i64, i64, i64)>> {
304 self.annotations
305 .iter()
306 .map(|annotation| {
307 if let (Some(s), Some(e), Some(l)) = (
308 annotation.reference_start,
309 annotation.reference_end,
310 annotation.reference_length,
311 ) {
312 Some((s, e, l))
313 } else {
314 None
315 }
316 })
317 .collect()
318 }
319
320 pub fn merge_ranges(multiple_ranges: Vec<&Self>) -> Self {
321 if multiple_ranges.is_empty() {
322 return Self::from_annotations(vec![], 0, false);
323 }
324
325 let reverse = multiple_ranges[0].reverse;
327 let seq_len = multiple_ranges[0].seq_len;
328 for r in multiple_ranges.iter() {
329 assert_eq!(r.reverse, reverse);
330 assert_eq!(r.seq_len, seq_len);
331 }
332
333 let annotations: Vec<FiberAnnotation> = multiple_ranges
335 .iter()
336 .flat_map(|r| r.annotations.clone())
337 .collect();
338
339 Self::from_annotations(annotations, seq_len, reverse)
341 }
342
343 fn apply_offset_helper(in_start: i64, in_end: i64, offset: i64, strand: char) -> (i64, i64) {
344 let mut start = in_start - offset;
345 let mut end = in_end - offset - 1; if strand == '-' {
347 start = -start;
348 end = -end;
349
350 if start > end {
352 std::mem::swap(&mut start, &mut end);
353 }
354 }
355 end += 1;
357 (start, end)
358 }
359
360 pub fn apply_offset(&mut self, offset: i64, ref_offset: i64, strand: char) {
362 for annotation in &mut self.annotations {
363 let (new_start, new_end) =
364 Self::apply_offset_helper(annotation.start, annotation.end, offset, strand);
365 annotation.start = new_start;
366 annotation.end = new_end;
367 assert!(
368 annotation.end - annotation.start == annotation.length,
369 "Annotation length mismatch after centering: {} != {} - {}",
370 annotation.length,
371 annotation.end,
372 annotation.start
373 );
374
375 if let (Some(ref_start), Some(ref_end)) =
377 (annotation.reference_start, annotation.reference_end)
378 {
379 let (new_ref_start, new_ref_end) =
380 Self::apply_offset_helper(ref_start, ref_end, ref_offset, strand);
381 annotation.reference_start = Some(new_ref_start);
382 annotation.reference_end = Some(new_ref_end);
383 annotation.reference_length = Some(new_ref_end - new_ref_start + 1);
384 }
385 }
386 if strand == '-' {
388 self.annotations.reverse();
389 }
390 }
391
392 pub fn from_bam_tags(
394 record: &rust_htslib::bam::Record,
395 start_tag: &[u8; 2],
396 length_tag: &[u8; 2],
397 annotation_tag: Option<&[u8; 2]>,
398 ) -> anyhow::Result<Option<Self>> {
399 if let (Ok(start_aux), Ok(length_aux)) = (record.aux(start_tag), record.aux(length_tag)) {
401 if let (
402 rust_htslib::bam::record::Aux::ArrayU32(start_array),
403 rust_htslib::bam::record::Aux::ArrayU32(length_array),
404 ) = (start_aux, length_aux)
405 {
406 let start_values: Vec<u32> = start_array.iter().collect();
407 let length_values: Vec<u32> = length_array.iter().collect();
408
409 if start_values.len() != length_values.len() {
410 return Err(anyhow::anyhow!(
411 "Mismatched {} and {} array lengths",
412 String::from_utf8_lossy(start_tag),
413 String::from_utf8_lossy(length_tag)
414 ));
415 }
416
417 let forward_starts: Vec<i64> = start_values.iter().map(|&x| x as i64).collect();
419 let lengths: Vec<i64> = length_values.iter().map(|&x| x as i64).collect();
420
421 let annotation_values = if let Some(ann_tag) = annotation_tag {
423 if let Ok(rust_htslib::bam::record::Aux::String(ann_string)) =
424 record.aux(ann_tag)
425 {
426 Some(ann_string.split('|').collect::<Vec<_>>())
427 } else {
428 None
429 }
430 } else {
431 None
432 };
433
434 let mut fiber_annotations = FiberAnnotations::new(
436 record,
437 forward_starts,
438 None, Some(lengths), );
441
442 if let Some(ann_vals) = annotation_values {
444 for (i, annotation) in fiber_annotations.annotations.iter_mut().enumerate() {
445 if i < ann_vals.len() && !ann_vals[i].is_empty() {
446 annotation.extra_columns =
447 Some(ann_vals[i].split(';').map(|s| s.to_string()).collect());
448 }
449 }
450 }
451
452 Ok(Some(fiber_annotations))
453 } else {
454 Ok(None) }
456 } else {
457 Ok(None) }
459 }
460
461 pub fn overlapping_annotations(&self, range_start: i64, range_end: i64) -> Self {
463 let mut overlapping = Vec::new();
464
465 for annotation in &self.annotations {
466 if annotation.end > range_start && annotation.start < range_end {
468 overlapping.push(annotation.clone());
469 }
470 }
471
472 FiberAnnotations::from_annotations(overlapping, range_end - range_start, self.reverse)
473 }
474
475 pub fn write_to_bam_tags(
477 &self,
478 record: &mut rust_htslib::bam::Record,
479 start_tag: &[u8; 2],
480 length_tag: &[u8; 2],
481 annotation_tag: Option<&[u8; 2]>,
482 ) -> anyhow::Result<()> {
483 use anyhow::Context;
484
485 if self.annotations.is_empty() {
486 return Ok(());
487 }
488
489 let starts: Vec<u32> = self.annotations.iter().map(|a| a.start as u32).collect();
491
492 let lengths: Vec<u32> = self.annotations.iter().map(|a| a.length as u32).collect();
493
494 record
496 .push_aux(
497 start_tag,
498 rust_htslib::bam::record::Aux::ArrayU32((&starts).into()),
499 )
500 .with_context(|| format!("Failed to add {} tag", String::from_utf8_lossy(start_tag)))?;
501
502 record
504 .push_aux(
505 length_tag,
506 rust_htslib::bam::record::Aux::ArrayU32((&lengths).into()),
507 )
508 .with_context(|| {
509 format!("Failed to add {} tag", String::from_utf8_lossy(length_tag))
510 })?;
511
512 if let Some(ann_tag) = annotation_tag {
514 let extra_strings: Vec<String> = self
515 .annotations
516 .iter()
517 .map(|a| {
518 if let Some(ref extra_cols) = a.extra_columns {
519 extra_cols.join(";")
520 } else {
521 String::new()
522 }
523 })
524 .collect();
525
526 if extra_strings.iter().any(|s| !s.is_empty()) {
528 let joined_extra = extra_strings.join("|");
529 record
530 .push_aux(
531 ann_tag,
532 rust_htslib::bam::record::Aux::String(&joined_extra),
533 )
534 .with_context(|| {
535 format!("Failed to add {} tag", String::from_utf8_lossy(ann_tag))
536 })?;
537 }
538 }
539 Ok(())
540 }
541}
542
543impl<'a> IntoIterator for &'a FiberAnnotations {
544 type Item = &'a FiberAnnotation;
545 type IntoIter = FiberAnnotationsIterator<'a>;
546
547 fn into_iter(self) -> Self::IntoIter {
548 FiberAnnotationsIterator {
549 annotations: self,
550 index: 0,
551 }
552 }
553}
554
555pub struct FiberAnnotationsIterator<'a> {
556 annotations: &'a FiberAnnotations,
557 index: usize,
558}
559
560impl<'a> Iterator for FiberAnnotationsIterator<'a> {
561 type Item = &'a FiberAnnotation;
562 fn next(&mut self) -> Option<Self::Item> {
563 if self.index >= self.annotations.annotations.len() {
564 return None;
565 }
566 let annotation = &self.annotations.annotations[self.index];
567 self.index += 1;
568 Some(annotation)
569 }
570}
571
572pub type RangesIterator<'a> = FiberAnnotationsIterator<'a>;