1use crate::position::Position;
3use crate::read_filter::ReadFilter;
4use itertools::Itertools;
5use rust_htslib::bam::{
6 self, HeaderView,
7 pileup::{Alignment, Pileup},
8 record::Record,
9};
10use serde::Serialize;
11use smartstring::{LazyCompact, SmartString, alias::String};
12use std::default;
13
14use super::mate_fix::{Base, MateResolutionStrategy};
15
16#[derive(Debug, Serialize, Default)]
20#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
21pub struct PileupPosition {
22 #[serde(rename = "REF")]
24 pub ref_seq: String,
25 pub pos: u32,
27 #[serde(skip_serializing_if = "Option::is_none")]
29 pub ref_base: Option<char>,
30 pub depth: u32,
32 pub a: u32,
34 pub c: u32,
36 pub g: u32,
38 pub t: u32,
40 pub n: u32,
42 pub r: u32,
44 pub y: u32,
46 pub s: u32,
48 pub w: u32,
50 pub k: u32,
52 pub m: u32,
54 pub ins: u32,
57 pub del: u32,
59 pub ref_skip: u32,
61 pub fail: u32,
63 pub count_of_mate_resoutions: u32,
65 pub near_max_depth: bool,
67}
68
69impl Position for PileupPosition {
70 fn new(ref_seq: String, pos: u32) -> Self {
72 PileupPosition {
73 ref_seq,
74 pos,
75 ..default::Default::default()
76 }
77 }
78}
79
80impl PileupPosition {
81 #[inline(always)]
83 fn update<F: ReadFilter>(
84 &mut self,
85 alignment: &Alignment,
86 record: &Record,
87 read_filter: &F,
88 base_filter: Option<u8>,
89 recommended_base: Option<Base>,
90 mates_resolved: bool,
91 ) {
92 if !read_filter.filter_read(record, Some(alignment)) {
93 self.depth -= 1;
94 self.fail += 1;
95 return;
96 }
97 if alignment.is_refskip() {
100 self.ref_skip += 1;
101 self.depth -= 1;
102 } else if alignment.is_del() {
103 self.del += 1;
104 } else {
105 if let Some(base_qual_filter) = base_filter
110 && (record.seq().is_empty()
111 || record.qual()[alignment.qpos().unwrap()] < base_qual_filter)
112 {
113 self.n += 1
114 } else if let Some(b) = recommended_base {
115 match b {
116 Base::A => self.a += 1,
117 Base::C => self.c += 1,
118 Base::T => self.t += 1,
119 Base::G => self.g += 1,
120 Base::R => self.r += 1,
121 Base::Y => self.y += 1,
122 Base::S => self.s += 1,
123 Base::W => self.w += 1,
124 Base::K => self.k += 1,
125 Base::M => self.m += 1,
126 _ => self.n += 1,
127 }
128 } else if record.seq().is_empty() {
129 self.n += 1
130 } else {
131 match (record.seq()[alignment.qpos().unwrap()] as char).to_ascii_uppercase() {
132 'A' => self.a += 1,
133 'C' => self.c += 1,
134 'T' | 'U' => self.t += 1,
135 'G' => self.g += 1,
136 _ => self.n += 1,
137 }
138 }
139 if let bam::pileup::Indel::Ins(_len) = alignment.indel() {
141 self.ins += 1;
142 }
143 }
144
145 if mates_resolved {
146 self.count_of_mate_resoutions += 1;
147 }
148 }
149
150 #[inline]
162 pub fn from_pileup<F: ReadFilter>(
163 pileup: Pileup,
164 header: &bam::HeaderView,
165 read_filter: &F,
166 base_filter: Option<u8>,
167 ) -> Self {
168 let name = Self::compact_refseq(header, pileup.tid());
169 let mut pos = Self::new(name, pileup.pos());
171 pos.depth = pileup.depth();
172
173 for alignment in pileup.alignments() {
174 let record = alignment.record();
175 Self::update(
176 &mut pos,
177 &alignment,
178 &record,
179 read_filter,
180 base_filter,
181 None,
182 false,
183 );
184 }
185 pos
186 }
187
188 #[inline]
205 pub fn from_pileup_mate_aware<F: ReadFilter>(
206 pileup: Pileup,
207 header: &bam::HeaderView,
208 read_filter: &F,
209 base_filter: Option<u8>,
210 mate_fix_strat: MateResolutionStrategy,
211 ) -> Self {
212 let name = Self::compact_refseq(header, pileup.tid());
213 let mut pos = Self::new(name, pileup.pos());
215 pos.depth = pileup.depth();
216
217 let grouped_by_qname = pileup
219 .alignments()
220 .map(|aln| {
221 let record = aln.record();
222 (aln, record)
223 })
224 .sorted_by(|a, b| Ord::cmp(a.1.qname(), b.1.qname()))
225 .chunk_by(|a| a.1.qname().to_owned());
227
228 for (_qname, reads) in grouped_by_qname.into_iter() {
229 let mut total_reads = 0; let mut reads = reads
232 .inspect(|_| total_reads += 1)
233 .sorted_by(|a, b| mate_fix_strat.cmp(a, b, read_filter).ordering.reverse());
234 let best = &reads.next().unwrap();
235
236 if let Some(second_best) = &reads.next().as_ref() {
237 let result = mate_fix_strat.cmp(best, second_best, read_filter);
239 pos.depth -= total_reads - 1;
240 Self::update(
241 &mut pos,
242 &best.0,
243 &best.1,
244 read_filter,
245 base_filter,
246 result.recommended_base,
247 true,
248 );
249 } else {
250 pos.depth -= total_reads - 1;
251 Self::update(
252 &mut pos,
253 &best.0,
254 &best.1,
255 read_filter,
256 base_filter,
257 None,
258 false,
259 );
260 }
261 }
262 pos
263 }
264
265 #[inline]
267 pub fn compact_refseq(header: &HeaderView, tid: u32) -> SmartString<LazyCompact> {
268 let name = std::str::from_utf8(header.tid2name(tid)).unwrap();
269 String::from(name)
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use crate::read_filter::DefaultReadFilter;
277 use rust_htslib::bam::{self, Read, record::Record};
278 use std::cmp::Ordering;
279
280 #[test]
283 fn test_from_pileup_mate_aware_chooses_first_mate() {
284 use rust_htslib::bam::{IndexedReader, Writer, index};
285 use tempfile::tempdir;
286
287 let tempdir = tempdir().unwrap();
288 let bam_path = tempdir.path().join("test.bam");
289
290 let mut header = bam::header::Header::new();
292 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
293 chr1.push_tag(b"SN", &"chr1".to_owned());
294 chr1.push_tag(b"LN", &"100".to_owned());
295 header.push_record(&chr1);
296 let view = bam::HeaderView::from_header(&header);
297
298 let records = vec![
303 Record::from_sam(
304 &view,
305 b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAAA\t##########",
306 )
307 .unwrap(),
308 Record::from_sam(
309 &view,
310 b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tCCCCCCCCCC\t##########",
311 )
312 .unwrap(),
313 ];
314
315 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
317 for record in &records {
318 writer.write(record).unwrap();
319 }
320 drop(writer);
321
322 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
324
325 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
327 let header_view = reader.header().clone();
328
329 reader.fetch(("chr1", 14, 15)).unwrap();
331 let pileup_iter = reader.pileup();
332
333 let read_filter = DefaultReadFilter::new(0, 0, 0);
334
335 let mut found_position = false;
337 for pileup_result in pileup_iter {
338 let pileup = pileup_result.unwrap();
339 if pileup.pos() == 14 {
340 found_position = true;
342
343 let position = PileupPosition::from_pileup_mate_aware(
345 pileup,
346 &header_view,
347 &read_filter,
348 None,
349 MateResolutionStrategy::Original,
350 );
351
352 assert_eq!(position.depth, 1, "Depth should be 1 after mate selection");
355 assert_eq!(position.a, 1, "Should count first mate's A base");
356 assert_eq!(position.c, 0, "Should not count second mate's C base");
357 break;
358 }
359 }
360
361 assert!(found_position, "Should have found pileup at position 14");
362 }
363
364 #[test]
366 fn test_mate_resolution_strategies() {
367 use rust_htslib::bam::{IndexedReader, Writer, index};
368 use tempfile::tempdir;
369
370 let tempdir = tempdir().unwrap();
371 let bam_path = tempdir.path().join("test.bam");
372
373 let mut header = bam::header::Header::new();
375 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
376 chr1.push_tag(b"SN", &"chr1".to_owned());
377 chr1.push_tag(b"LN", &"100".to_owned());
378 header.push_record(&chr1);
379 let view = bam::HeaderView::from_header(&header);
380
381 let records = vec![
384 Record::from_sam(
385 &view,
386 b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAGA\t#########=",
387 )
388 .unwrap(),
389 Record::from_sam(
390 &view,
391 b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tGGGGGGGGGG\t((((((((((",
392 )
393 .unwrap(),
394 ];
395
396 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
398 for record in &records {
399 writer.write(record).unwrap();
400 }
401 drop(writer);
402
403 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
405
406 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
408 let _header_view = reader.header().clone();
409
410 reader.fetch(("chr1", 19, 20)).unwrap();
412 let pileup_iter = reader.pileup();
413
414 let read_filter = DefaultReadFilter::new(0, 0, 0);
415
416 for pileup_result in pileup_iter {
417 let pileup = pileup_result.unwrap();
418 if pileup.pos() == 19 {
419 let strat = crate::position::mate_fix::MateResolutionStrategy::BaseQualMapQualIUPAC;
421
422 let alns: Vec<_> = pileup
424 .alignments()
425 .map(|aln| {
426 let rec = aln.record();
427 (aln, rec)
428 })
429 .collect();
430
431 if alns.len() == 2 {
432 let result = strat.cmp(&alns[0], &alns[1], &read_filter);
434 assert_eq!(
435 result.ordering,
436 Ordering::Less,
437 "Second mate should win due to higher base quality"
438 );
439 }
440 break;
441 }
442 }
443 }
444
445 #[test]
447 fn test_iupac_base_resolution() {
448 use rust_htslib::bam::{IndexedReader, Writer, index};
449 use tempfile::tempdir;
450
451 let tempdir = tempdir().unwrap();
452 let bam_path = tempdir.path().join("test.bam");
453
454 let mut header = bam::header::Header::new();
456 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
457 chr1.push_tag(b"SN", &"chr1".to_owned());
458 chr1.push_tag(b"LN", &"100".to_owned());
459 header.push_record(&chr1);
460 let view = bam::HeaderView::from_header(&header);
461
462 let records = vec![
465 Record::from_sam(
466 &view,
467 b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAAA\t>>>>>>>>>>",
468 )
469 .unwrap(),
470 Record::from_sam(
471 &view,
472 b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tGGGGGGGGGG\t>>>>>>>>>>",
473 )
474 .unwrap(),
475 ];
476
477 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
479 for record in &records {
480 writer.write(record).unwrap();
481 }
482 drop(writer);
483
484 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
486
487 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
489 let _header_view = reader.header().clone();
490
491 reader.fetch(("chr1", 19, 20)).unwrap();
492 let pileup_iter = reader.pileup();
493
494 let read_filter = DefaultReadFilter::new(0, 0, 0);
495
496 for pileup_result in pileup_iter {
497 let pileup = pileup_result.unwrap();
498 if pileup.pos() == 19 {
499 let strat = crate::position::mate_fix::MateResolutionStrategy::BaseQualMapQualIUPAC;
500
501 let alns: Vec<_> = pileup
502 .alignments()
503 .map(|aln| {
504 let rec = aln.record();
505 (aln, rec)
506 })
507 .collect();
508
509 if alns.len() == 2 {
510 let result = strat.cmp(&alns[0], &alns[1], &read_filter);
512 assert!(matches!(
514 result.recommended_base,
515 Some(crate::position::mate_fix::Base::R)
516 ));
517 }
518 break;
519 }
520 }
521 }
522
523 #[test]
527 fn test_empty_seq_pileup() {
528 use rust_htslib::bam::{IndexedReader, Writer, index};
529 use tempfile::tempdir;
530
531 let tempdir = tempdir().unwrap();
532 let bam_path = tempdir.path().join("empty_seq.bam");
533
534 let mut header = bam::header::Header::new();
536 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
537 chr1.push_tag(b"SN", &"chr1".to_owned());
538 chr1.push_tag(b"LN", &"100".to_owned());
539 header.push_record(&chr1);
540 let view = bam::HeaderView::from_header(&header);
541
542 let normal_record = Record::from_sam(
545 &view,
546 b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
547 )
548 .unwrap();
549
550 let empty_seq_record =
552 Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t10\t40\t25M\t*\t0\t0\t*\t*").unwrap();
553
554 let normal_record2 = Record::from_sam(
556 &view,
557 b"NORMAL2\t0\tchr1\t15\t40\t25M\t*\t0\t0\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################",
558 )
559 .unwrap();
560
561 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
563 writer.write(&normal_record).unwrap();
564 writer.write(&empty_seq_record).unwrap();
565 writer.write(&normal_record2).unwrap();
566 drop(writer);
567
568 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
570
571 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
573 let header_view = reader.header().clone();
574
575 let read_filter = DefaultReadFilter::new(0, 0, 0);
576
577 reader.fetch(("chr1", 14, 15)).unwrap();
580 let pileup_iter = reader.pileup();
581
582 for pileup_result in pileup_iter {
583 let pileup = pileup_result.unwrap();
584 if pileup.pos() == 14 {
585 let position =
586 PileupPosition::from_pileup(pileup, &header_view, &read_filter, None);
587
588 assert_eq!(
590 position.depth, 3,
591 "Depth should be 3 (NORMAL + EMPTY_SEQ + NORMAL2)"
592 );
593
594 assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
596
597 assert_eq!(position.g, 1, "Should have 1 G from NORMAL2 read");
599
600 assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
602
603 break;
604 }
605 }
606 }
607
608 #[test]
611 fn test_empty_seq_with_base_qual_filter() {
612 use rust_htslib::bam::{IndexedReader, Writer, index};
613 use tempfile::tempdir;
614
615 let tempdir = tempdir().unwrap();
616 let bam_path = tempdir.path().join("empty_seq_bq.bam");
617
618 let mut header = bam::header::Header::new();
620 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
621 chr1.push_tag(b"SN", &"chr1".to_owned());
622 chr1.push_tag(b"LN", &"100".to_owned());
623 header.push_record(&chr1);
624 let view = bam::HeaderView::from_header(&header);
625
626 let normal_record = Record::from_sam(
628 &view,
629 b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\tIIIIIIIIIIIIIIIIIIIIIIIII",
630 )
631 .unwrap();
632
633 let empty_seq_record =
635 Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*").unwrap();
636
637 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
639 writer.write(&normal_record).unwrap();
640 writer.write(&empty_seq_record).unwrap();
641 drop(writer);
642
643 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
645
646 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
648 let header_view = reader.header().clone();
649
650 let read_filter = DefaultReadFilter::new(0, 0, 0);
651
652 reader.fetch(("chr1", 0, 1)).unwrap();
653 let pileup_iter = reader.pileup();
654
655 for pileup_result in pileup_iter {
656 let pileup = pileup_result.unwrap();
657 if pileup.pos() == 0 {
658 let position =
660 PileupPosition::from_pileup(pileup, &header_view, &read_filter, Some(20));
661
662 assert_eq!(position.depth, 2, "Depth should be 2");
664
665 assert_eq!(
667 position.a, 1,
668 "Should have 1 A from high-quality NORMAL read"
669 );
670
671 assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
673
674 break;
675 }
676 }
677 }
678
679 #[test]
681 fn test_empty_seq_mate_aware() {
682 use rust_htslib::bam::{IndexedReader, Writer, index};
683 use tempfile::tempdir;
684
685 let tempdir = tempdir().unwrap();
686 let bam_path = tempdir.path().join("empty_seq_mate.bam");
687
688 let mut header = bam::header::Header::new();
690 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
691 chr1.push_tag(b"SN", &"chr1".to_owned());
692 chr1.push_tag(b"LN", &"100".to_owned());
693 header.push_record(&chr1);
694 let view = bam::HeaderView::from_header(&header);
695
696 let normal_record = Record::from_sam(
698 &view,
699 b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
700 )
701 .unwrap();
702
703 let empty_seq_record =
705 Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*").unwrap();
706
707 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
709 writer.write(&normal_record).unwrap();
710 writer.write(&empty_seq_record).unwrap();
711 drop(writer);
712
713 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
715
716 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
718 let header_view = reader.header().clone();
719
720 let read_filter = DefaultReadFilter::new(0, 0, 0);
721
722 reader.fetch(("chr1", 0, 1)).unwrap();
723 let pileup_iter = reader.pileup();
724
725 for pileup_result in pileup_iter {
726 let pileup = pileup_result.unwrap();
727 if pileup.pos() == 0 {
728 let position = PileupPosition::from_pileup_mate_aware(
729 pileup,
730 &header_view,
731 &read_filter,
732 None,
733 MateResolutionStrategy::Original,
734 );
735
736 assert_eq!(position.depth, 2, "Depth should be 2");
738
739 assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
741
742 assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
744
745 break;
746 }
747 }
748 }
749
750 #[test]
752 fn test_n_strategy() {
753 use rust_htslib::bam::{IndexedReader, Writer, index};
754 use tempfile::tempdir;
755
756 let tempdir = tempdir().unwrap();
757 let bam_path = tempdir.path().join("test.bam");
758
759 let mut header = bam::header::Header::new();
761 let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
762 chr1.push_tag(b"SN", &"chr1".to_owned());
763 chr1.push_tag(b"LN", &"100".to_owned());
764 header.push_record(&chr1);
765 let view = bam::HeaderView::from_header(&header);
766
767 let records = vec![
769 Record::from_sam(
770 &view,
771 b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tCCCCCCCCCC\t>>>>>>>>>>",
772 )
773 .unwrap(),
774 Record::from_sam(
775 &view,
776 b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tTTTTTTTTTT\t>>>>>>>>>>",
777 )
778 .unwrap(),
779 ];
780
781 let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
783 for record in &records {
784 writer.write(record).unwrap();
785 }
786 drop(writer);
787
788 index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
790
791 let mut reader = IndexedReader::from_path(&bam_path).unwrap();
793 let _header_view = reader.header().clone();
794
795 reader.fetch(("chr1", 19, 20)).unwrap();
796 let pileup_iter = reader.pileup();
797
798 let read_filter = DefaultReadFilter::new(0, 0, 0);
799
800 for pileup_result in pileup_iter {
801 let pileup = pileup_result.unwrap();
802 if pileup.pos() == 19 {
803 let strat = crate::position::mate_fix::MateResolutionStrategy::N;
804
805 let alns: Vec<_> = pileup
806 .alignments()
807 .map(|aln| {
808 let rec = aln.record();
809 (aln, rec)
810 })
811 .collect();
812
813 if alns.len() == 2 {
814 let result = strat.cmp(&alns[0], &alns[1], &read_filter);
816 assert!(matches!(
817 result.recommended_base,
818 Some(crate::position::mate_fix::Base::N)
819 ));
820 }
821 break;
822 }
823 }
824 }
825}