perbase_lib/position/
pileup_position.rs

1//! An implementation of `Position` for dealing with pileups.
2use crate::position::Position;
3use crate::read_filter::ReadFilter;
4use itertools::Itertools;
5use rust_htslib::bam::{
6    self, HeaderView,
7    pileup::{Alignment, Pileup},
8    record::Record,
9};
10use serde::Serialize;
11use smartstring::{LazyCompact, SmartString, alias::String};
12use std::default;
13
14use super::mate_fix::{Base, MateResolutionStrategy};
15
16/// Hold all information about a position.
17// NB: The max depth that htslib will return is i32::MAX, and the type of pos for htlib is u32
18// There is no reason to go bigger, for now at least
19#[derive(Debug, Serialize, Default)]
20#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
21pub struct PileupPosition {
22    /// Reference sequence name.
23    #[serde(rename = "REF")]
24    pub ref_seq: String,
25    /// 1-based position in the sequence.
26    pub pos: u32,
27    /// The reference base at this position.
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub ref_base: Option<char>,
30    /// Total depth at this position.
31    pub depth: u32,
32    /// Number of A bases at this position.
33    pub a: u32,
34    /// Number of C bases at this position.
35    pub c: u32,
36    /// Number of G bases at this position.
37    pub g: u32,
38    /// Number of T bases at this position.
39    pub t: u32,
40    /// Number of N bases at this position. Any unrecognized base will be counted as an N.
41    pub n: u32,
42    /// Number of bases that could be A or G
43    pub r: u32,
44    /// Number of bases that could be C or T
45    pub y: u32,
46    /// Number of bases that could be G or C
47    pub s: u32,
48    /// Number of bases that could be A or T
49    pub w: u32,
50    /// Number of bases that could be G or T
51    pub k: u32,
52    /// Number of bases that could be A or C
53    pub m: u32,
54    /// Number of insertions that start to the right of this position.
55    /// Does not count toward depth.
56    pub ins: u32,
57    /// Number of deletions at this position.
58    pub del: u32,
59    /// Number of refskips at this position. Does not count toward depth.
60    pub ref_skip: u32,
61    /// Number of reads failing filters at this position.
62    pub fail: u32,
63    /// Number of times a mate resolution was needed.
64    pub count_of_mate_resoutions: u32,
65    /// Depth is within 1% of max_depth
66    pub near_max_depth: bool,
67}
68
69impl Position for PileupPosition {
70    /// Create a new position for the given ref_seq name.
71    fn new(ref_seq: String, pos: u32) -> Self {
72        PileupPosition {
73            ref_seq,
74            pos,
75            ..default::Default::default()
76        }
77    }
78}
79
80impl PileupPosition {
81    /// Given a record, update the counts at this position
82    #[inline(always)]
83    fn update<F: ReadFilter>(
84        &mut self,
85        alignment: &Alignment,
86        record: &Record,
87        read_filter: &F,
88        base_filter: Option<u8>,
89        recommended_base: Option<Base>,
90        mates_resolved: bool,
91    ) {
92        if !read_filter.filter_read(record, Some(alignment)) {
93            self.depth -= 1;
94            self.fail += 1;
95            return;
96        }
97        // NB: Order matters here, a refskip is true for both is_del and is_refskip
98        // while a true del is only true for is_del
99        if alignment.is_refskip() {
100            self.ref_skip += 1;
101            self.depth -= 1;
102        } else if alignment.is_del() {
103            self.del += 1;
104        } else {
105            // We have an actual base!
106
107            // Check if we are checking the base quality score
108            // && Check if the base quality score is greater or equal to than the cutoff
109            if let Some(base_qual_filter) = base_filter
110                && (record.seq().is_empty()
111                    || record.qual()[alignment.qpos().unwrap()] < base_qual_filter)
112            {
113                self.n += 1
114            } else if let Some(b) = recommended_base {
115                match b {
116                    Base::A => self.a += 1,
117                    Base::C => self.c += 1,
118                    Base::T => self.t += 1,
119                    Base::G => self.g += 1,
120                    Base::R => self.r += 1,
121                    Base::Y => self.y += 1,
122                    Base::S => self.s += 1,
123                    Base::W => self.w += 1,
124                    Base::K => self.k += 1,
125                    Base::M => self.m += 1,
126                    _ => self.n += 1,
127                }
128            } else if record.seq().is_empty() {
129                self.n += 1
130            } else {
131                match (record.seq()[alignment.qpos().unwrap()] as char).to_ascii_uppercase() {
132                    'A' => self.a += 1,
133                    'C' => self.c += 1,
134                    'T' | 'U' => self.t += 1,
135                    'G' => self.g += 1,
136                    _ => self.n += 1,
137                }
138            }
139            // Check for insertions
140            if let bam::pileup::Indel::Ins(_len) = alignment.indel() {
141                self.ins += 1;
142            }
143        }
144
145        if mates_resolved {
146            self.count_of_mate_resoutions += 1;
147        }
148    }
149
150    /// Convert a pileup into a `Position`.
151    ///
152    /// This will walk over each of the alignments and count the number each nucleotide it finds.
153    /// It will also count the number of Ins/Dels/Skips that are at each position.
154    ///
155    /// # Arguments
156    ///
157    /// * `pileup` - a pileup at a genomic position
158    /// * `header` - a headerview for the bam file being read, to get the sequence name
159    /// * `read_filter` - a function to filter out reads, returning false will cause a read to be filtered
160    /// * `base_filter` - an optional base quality score. If Some(number) if the base quality is not >= that number the base is treated as an `N`
161    #[inline]
162    pub fn from_pileup<F: ReadFilter>(
163        pileup: Pileup,
164        header: &bam::HeaderView,
165        read_filter: &F,
166        base_filter: Option<u8>,
167    ) -> Self {
168        let name = Self::compact_refseq(header, pileup.tid());
169        // make output 1-based
170        let mut pos = Self::new(name, pileup.pos());
171        pos.depth = pileup.depth();
172
173        for alignment in pileup.alignments() {
174            let record = alignment.record();
175            Self::update(
176                &mut pos,
177                &alignment,
178                &record,
179                read_filter,
180                base_filter,
181                None,
182                false,
183            );
184        }
185        pos
186    }
187
188    /// Convert a pileup into a `Position`.
189    ///
190    /// This will walk over each of the alignments and count the number each nucleotide it finds.
191    /// It will also count the number of Ins/Dels/Skips that are at each position.
192    ///
193    /// Additionally, this method is mate aware. Before processing a position it will scan the alignments for mates.
194    /// If a mate is found, it will try to take use the mate that has the highest MAPQ, breaking ties by choosing the
195    /// first in pair that passes filters. In the event of both failing filters or not being first in pair, the first
196    /// read encountered is kept.
197    ///
198    /// # Arguments
199    ///
200    /// * `pileup` - a pileup at a genomic position
201    /// * `header` - a headerview for the bam file being read, to get the sequence name
202    /// * `read_filter` - a function to filter out reads, returning false will cause a read to be filtered
203    /// * `base_filter` - an optional base quality score. If Some(number) if the base quality is not >= that number the base is treated as an `N`
204    #[inline]
205    pub fn from_pileup_mate_aware<F: ReadFilter>(
206        pileup: Pileup,
207        header: &bam::HeaderView,
208        read_filter: &F,
209        base_filter: Option<u8>,
210        mate_fix_strat: MateResolutionStrategy,
211    ) -> Self {
212        let name = Self::compact_refseq(header, pileup.tid());
213        // make output 1-based
214        let mut pos = Self::new(name, pileup.pos());
215        pos.depth = pileup.depth();
216
217        // Group records by qname
218        let grouped_by_qname = pileup
219            .alignments()
220            .map(|aln| {
221                let record = aln.record();
222                (aln, record)
223            })
224            .sorted_by(|a, b| Ord::cmp(a.1.qname(), b.1.qname()))
225            // TODO: I'm not sure there is a good way to remove this allocation
226            .chunk_by(|a| a.1.qname().to_owned());
227
228        for (_qname, reads) in grouped_by_qname.into_iter() {
229            let mut total_reads = 0; // count how many reads there were
230
231            let mut reads = reads
232                .inspect(|_| total_reads += 1)
233                .sorted_by(|a, b| mate_fix_strat.cmp(a, b, read_filter).ordering.reverse());
234            let best = &reads.next().unwrap();
235
236            if let Some(second_best) = &reads.next().as_ref() {
237                // Deal explicitly with mate overlap, they are already ordered correctly
238                let result = mate_fix_strat.cmp(best, second_best, read_filter);
239                pos.depth -= total_reads - 1;
240                Self::update(
241                    &mut pos,
242                    &best.0,
243                    &best.1,
244                    read_filter,
245                    base_filter,
246                    result.recommended_base,
247                    true,
248                );
249            } else {
250                pos.depth -= total_reads - 1;
251                Self::update(
252                    &mut pos,
253                    &best.0,
254                    &best.1,
255                    read_filter,
256                    base_filter,
257                    None,
258                    false,
259                );
260            }
261        }
262        pos
263    }
264
265    /// Convert a tid to a [`SmartString<LazyCompact>`].
266    #[inline]
267    pub fn compact_refseq(header: &HeaderView, tid: u32) -> SmartString<LazyCompact> {
268        let name = std::str::from_utf8(header.tid2name(tid)).unwrap();
269        String::from(name)
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use crate::read_filter::DefaultReadFilter;
277    use rust_htslib::bam::{self, Read, record::Record};
278    use std::cmp::Ordering;
279
280    /// Test that from_pileup_mate_aware chooses first mate when MAPQ scores are equal
281    /// This verifies the fix for issue #82 by testing the actual function with overlapping mates
282    #[test]
283    fn test_from_pileup_mate_aware_chooses_first_mate() {
284        use rust_htslib::bam::{IndexedReader, Writer, index};
285        use tempfile::tempdir;
286
287        let tempdir = tempdir().unwrap();
288        let bam_path = tempdir.path().join("test.bam");
289
290        // Create header
291        let mut header = bam::header::Header::new();
292        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
293        chr1.push_tag(b"SN", &"chr1".to_owned());
294        chr1.push_tag(b"LN", &"100".to_owned());
295        header.push_record(&chr1);
296        let view = bam::HeaderView::from_header(&header);
297
298        // Create overlapping mate pair with equal MAPQ where they disagree on a base
299        // Both mates overlap at position 15 (1-based)
300        // First mate has 'A' at position 15, second mate has 'C' at position 15
301        // Both have MAPQ 40, so tie-breaker should choose first mate
302        let records = vec![
303            Record::from_sam(
304                &view,
305                b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAAA\t##########",
306            )
307            .unwrap(),
308            Record::from_sam(
309                &view,
310                b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tCCCCCCCCCC\t##########",
311            )
312            .unwrap(),
313        ];
314
315        // Write BAM file
316        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
317        for record in &records {
318            writer.write(record).unwrap();
319        }
320        drop(writer);
321
322        // Index the BAM file
323        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
324
325        // Read back and create pileup at the overlapping position (15, 1-based = 14, 0-based)
326        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
327        let header_view = reader.header().clone();
328
329        // Set up pileup at position 14 (0-based) where both mates overlap
330        reader.fetch(("chr1", 14, 15)).unwrap();
331        let pileup_iter = reader.pileup();
332
333        let read_filter = DefaultReadFilter::new(0, 0, 0);
334
335        // Find the pileup at position 14
336        let mut found_position = false;
337        for pileup_result in pileup_iter {
338            let pileup = pileup_result.unwrap();
339            if pileup.pos() == 14 {
340                // 0-based position 14 = 1-based position 15
341                found_position = true;
342
343                // Test mate-aware processing
344                let position = PileupPosition::from_pileup_mate_aware(
345                    pileup,
346                    &header_view,
347                    &read_filter,
348                    None,
349                    MateResolutionStrategy::Original,
350                );
351
352                // Should choose first mate's 'A' over second mate's 'C'
353                // Depth should be 1 (one mate chosen from the overlapping pair)
354                assert_eq!(position.depth, 1, "Depth should be 1 after mate selection");
355                assert_eq!(position.a, 1, "Should count first mate's A base");
356                assert_eq!(position.c, 0, "Should not count second mate's C base");
357                break;
358            }
359        }
360
361        assert!(found_position, "Should have found pileup at position 14");
362    }
363
364    /// Test different MateResolutionStrategy behaviors
365    #[test]
366    fn test_mate_resolution_strategies() {
367        use rust_htslib::bam::{IndexedReader, Writer, index};
368        use tempfile::tempdir;
369
370        let tempdir = tempdir().unwrap();
371        let bam_path = tempdir.path().join("test.bam");
372
373        // Create header
374        let mut header = bam::header::Header::new();
375        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
376        chr1.push_tag(b"SN", &"chr1".to_owned());
377        chr1.push_tag(b"LN", &"100".to_owned());
378        header.push_record(&chr1);
379        let view = bam::HeaderView::from_header(&header);
380
381        // Create overlapping mate pair with different base qualities and bases
382        // Position 14 (0-based): First mate has 'A' with quality 30, second mate has 'G' with quality 40
383        let records = vec![
384            Record::from_sam(
385                &view,
386                b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAGA\t#########=",
387            )
388            .unwrap(),
389            Record::from_sam(
390                &view,
391                b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tGGGGGGGGGG\t((((((((((",
392            )
393            .unwrap(),
394        ];
395
396        // Write BAM file
397        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
398        for record in &records {
399            writer.write(record).unwrap();
400        }
401        drop(writer);
402
403        // Index the BAM file
404        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
405
406        // Read back and test different strategies
407        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
408        let _header_view = reader.header().clone();
409
410        // Test position 19 (0-based) where mates overlap
411        reader.fetch(("chr1", 19, 20)).unwrap();
412        let pileup_iter = reader.pileup();
413
414        let read_filter = DefaultReadFilter::new(0, 0, 0);
415
416        for pileup_result in pileup_iter {
417            let pileup = pileup_result.unwrap();
418            if pileup.pos() == 19 {
419                // Test with different strategies
420                let strat = crate::position::mate_fix::MateResolutionStrategy::BaseQualMapQualIUPAC;
421
422                // Get alignments
423                let alns: Vec<_> = pileup
424                    .alignments()
425                    .map(|aln| {
426                        let rec = aln.record();
427                        (aln, rec)
428                    })
429                    .collect();
430
431                if alns.len() == 2 {
432                    // Test that higher base quality wins (second mate has quality 40)
433                    let result = strat.cmp(&alns[0], &alns[1], &read_filter);
434                    assert_eq!(
435                        result.ordering,
436                        Ordering::Less,
437                        "Second mate should win due to higher base quality"
438                    );
439                }
440                break;
441            }
442        }
443    }
444
445    /// Test IUPAC base resolution
446    #[test]
447    fn test_iupac_base_resolution() {
448        use rust_htslib::bam::{IndexedReader, Writer, index};
449        use tempfile::tempdir;
450
451        let tempdir = tempdir().unwrap();
452        let bam_path = tempdir.path().join("test.bam");
453
454        // Create header
455        let mut header = bam::header::Header::new();
456        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
457        chr1.push_tag(b"SN", &"chr1".to_owned());
458        chr1.push_tag(b"LN", &"100".to_owned());
459        header.push_record(&chr1);
460        let view = bam::HeaderView::from_header(&header);
461
462        // Create overlapping mates with same quality but different bases
463        // Both have MAPQ 40 and base quality 30
464        let records = vec![
465            Record::from_sam(
466                &view,
467                b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tAAAAAAAAAA\t>>>>>>>>>>",
468            )
469            .unwrap(),
470            Record::from_sam(
471                &view,
472                b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tGGGGGGGGGG\t>>>>>>>>>>",
473            )
474            .unwrap(),
475        ];
476
477        // Write BAM file
478        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
479        for record in &records {
480            writer.write(record).unwrap();
481        }
482        drop(writer);
483
484        // Index the BAM file
485        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
486
487        // Read back and test IUPAC strategy
488        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
489        let _header_view = reader.header().clone();
490
491        reader.fetch(("chr1", 19, 20)).unwrap();
492        let pileup_iter = reader.pileup();
493
494        let read_filter = DefaultReadFilter::new(0, 0, 0);
495
496        for pileup_result in pileup_iter {
497            let pileup = pileup_result.unwrap();
498            if pileup.pos() == 19 {
499                let strat = crate::position::mate_fix::MateResolutionStrategy::BaseQualMapQualIUPAC;
500
501                let alns: Vec<_> = pileup
502                    .alignments()
503                    .map(|aln| {
504                        let rec = aln.record();
505                        (aln, rec)
506                    })
507                    .collect();
508
509                if alns.len() == 2 {
510                    // With equal qualities, should return IUPAC code
511                    let result = strat.cmp(&alns[0], &alns[1], &read_filter);
512                    // A + G should give R
513                    assert!(matches!(
514                        result.recommended_base,
515                        Some(crate::position::mate_fix::Base::R)
516                    ));
517                }
518                break;
519            }
520        }
521    }
522
523    /// Test that reads with empty SEQ (represented as `*` in SAM format) are handled correctly
524    /// in the pileup code path used by base-depth. Empty SEQ reads should still count toward
525    /// depth, but their bases should be counted as N. This tests the fix for issue #91.
526    #[test]
527    fn test_empty_seq_pileup() {
528        use rust_htslib::bam::{IndexedReader, Writer, index};
529        use tempfile::tempdir;
530
531        let tempdir = tempdir().unwrap();
532        let bam_path = tempdir.path().join("empty_seq.bam");
533
534        // Create header
535        let mut header = bam::header::Header::new();
536        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
537        chr1.push_tag(b"SN", &"chr1".to_owned());
538        chr1.push_tag(b"LN", &"100".to_owned());
539        header.push_record(&chr1);
540        let view = bam::HeaderView::from_header(&header);
541
542        // Create records including one with empty SEQ
543        // Normal read with 'A' bases at positions 1-25
544        let normal_record = Record::from_sam(
545            &view,
546            b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
547        )
548        .unwrap();
549
550        // Read with empty SEQ at positions 10-35 - should count toward depth and N count
551        let empty_seq_record =
552            Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t10\t40\t25M\t*\t0\t0\t*\t*").unwrap();
553
554        // Another normal read with 'G' bases at positions 15-40
555        let normal_record2 = Record::from_sam(
556            &view,
557            b"NORMAL2\t0\tchr1\t15\t40\t25M\t*\t0\t0\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################",
558        )
559        .unwrap();
560
561        // Write BAM file
562        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
563        writer.write(&normal_record).unwrap();
564        writer.write(&empty_seq_record).unwrap();
565        writer.write(&normal_record2).unwrap();
566        drop(writer);
567
568        // Index the BAM file
569        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
570
571        // Read back and create pileup
572        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
573        let header_view = reader.header().clone();
574
575        let read_filter = DefaultReadFilter::new(0, 0, 0);
576
577        // Test at position 14 (0-based) = position 15 (1-based)
578        // At this position: NORMAL (A), EMPTY_SEQ (*), NORMAL2 (G) = depth 3
579        reader.fetch(("chr1", 14, 15)).unwrap();
580        let pileup_iter = reader.pileup();
581
582        for pileup_result in pileup_iter {
583            let pileup = pileup_result.unwrap();
584            if pileup.pos() == 14 {
585                let position =
586                    PileupPosition::from_pileup(pileup, &header_view, &read_filter, None);
587
588                // Depth should include all 3 reads
589                assert_eq!(
590                    position.depth, 3,
591                    "Depth should be 3 (NORMAL + EMPTY_SEQ + NORMAL2)"
592                );
593
594                // 'A' count from NORMAL read
595                assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
596
597                // 'G' count from NORMAL2 read
598                assert_eq!(position.g, 1, "Should have 1 G from NORMAL2 read");
599
600                // 'N' count from EMPTY_SEQ read (empty seq should be counted as N)
601                assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
602
603                break;
604            }
605        }
606    }
607
608    /// Test that reads with empty SEQ work correctly with base quality filtering.
609    /// When a base quality filter is set and SEQ is empty, the read should be counted as N.
610    #[test]
611    fn test_empty_seq_with_base_qual_filter() {
612        use rust_htslib::bam::{IndexedReader, Writer, index};
613        use tempfile::tempdir;
614
615        let tempdir = tempdir().unwrap();
616        let bam_path = tempdir.path().join("empty_seq_bq.bam");
617
618        // Create header
619        let mut header = bam::header::Header::new();
620        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
621        chr1.push_tag(b"SN", &"chr1".to_owned());
622        chr1.push_tag(b"LN", &"100".to_owned());
623        header.push_record(&chr1);
624        let view = bam::HeaderView::from_header(&header);
625
626        // Normal read with high quality
627        let normal_record = Record::from_sam(
628            &view,
629            b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\tIIIIIIIIIIIIIIIIIIIIIIIII",
630        )
631        .unwrap();
632
633        // Read with empty SEQ
634        let empty_seq_record =
635            Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*").unwrap();
636
637        // Write BAM file
638        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
639        writer.write(&normal_record).unwrap();
640        writer.write(&empty_seq_record).unwrap();
641        drop(writer);
642
643        // Index the BAM file
644        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
645
646        // Read back and create pileup with base quality filter
647        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
648        let header_view = reader.header().clone();
649
650        let read_filter = DefaultReadFilter::new(0, 0, 0);
651
652        reader.fetch(("chr1", 0, 1)).unwrap();
653        let pileup_iter = reader.pileup();
654
655        for pileup_result in pileup_iter {
656            let pileup = pileup_result.unwrap();
657            if pileup.pos() == 0 {
658                // With base quality filter of 20
659                let position =
660                    PileupPosition::from_pileup(pileup, &header_view, &read_filter, Some(20));
661
662                // Depth should include both reads
663                assert_eq!(position.depth, 2, "Depth should be 2");
664
665                // 'A' from NORMAL (quality 'I' = 40, passes filter)
666                assert_eq!(
667                    position.a, 1,
668                    "Should have 1 A from high-quality NORMAL read"
669                );
670
671                // Empty SEQ read should be counted as N (regardless of base qual filter)
672                assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
673
674                break;
675            }
676        }
677    }
678
679    /// Test that reads with empty SEQ work correctly in mate-aware pileup mode.
680    #[test]
681    fn test_empty_seq_mate_aware() {
682        use rust_htslib::bam::{IndexedReader, Writer, index};
683        use tempfile::tempdir;
684
685        let tempdir = tempdir().unwrap();
686        let bam_path = tempdir.path().join("empty_seq_mate.bam");
687
688        // Create header
689        let mut header = bam::header::Header::new();
690        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
691        chr1.push_tag(b"SN", &"chr1".to_owned());
692        chr1.push_tag(b"LN", &"100".to_owned());
693        header.push_record(&chr1);
694        let view = bam::HeaderView::from_header(&header);
695
696        // Normal read
697        let normal_record = Record::from_sam(
698            &view,
699            b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
700        )
701        .unwrap();
702
703        // Read with empty SEQ
704        let empty_seq_record =
705            Record::from_sam(&view, b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*").unwrap();
706
707        // Write BAM file
708        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
709        writer.write(&normal_record).unwrap();
710        writer.write(&empty_seq_record).unwrap();
711        drop(writer);
712
713        // Index the BAM file
714        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
715
716        // Read back and create pileup in mate-aware mode
717        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
718        let header_view = reader.header().clone();
719
720        let read_filter = DefaultReadFilter::new(0, 0, 0);
721
722        reader.fetch(("chr1", 0, 1)).unwrap();
723        let pileup_iter = reader.pileup();
724
725        for pileup_result in pileup_iter {
726            let pileup = pileup_result.unwrap();
727            if pileup.pos() == 0 {
728                let position = PileupPosition::from_pileup_mate_aware(
729                    pileup,
730                    &header_view,
731                    &read_filter,
732                    None,
733                    MateResolutionStrategy::Original,
734                );
735
736                // Depth should include both reads
737                assert_eq!(position.depth, 2, "Depth should be 2");
738
739                // 'A' from NORMAL read
740                assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
741
742                // Empty SEQ read should be counted as N
743                assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
744
745                break;
746            }
747        }
748    }
749
750    /// Test N strategy
751    #[test]
752    fn test_n_strategy() {
753        use rust_htslib::bam::{IndexedReader, Writer, index};
754        use tempfile::tempdir;
755
756        let tempdir = tempdir().unwrap();
757        let bam_path = tempdir.path().join("test.bam");
758
759        // Create header
760        let mut header = bam::header::Header::new();
761        let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
762        chr1.push_tag(b"SN", &"chr1".to_owned());
763        chr1.push_tag(b"LN", &"100".to_owned());
764        header.push_record(&chr1);
765        let view = bam::HeaderView::from_header(&header);
766
767        // Create overlapping mates with same quality but different bases
768        let records = vec![
769            Record::from_sam(
770                &view,
771                b"TESTPAIR\t67\tchr1\t10\t40\t10M\tchr1\t15\t30\tCCCCCCCCCC\t>>>>>>>>>>",
772            )
773            .unwrap(),
774            Record::from_sam(
775                &view,
776                b"TESTPAIR\t147\tchr1\t15\t40\t10M\tchr1\t10\t30\tTTTTTTTTTT\t>>>>>>>>>>",
777            )
778            .unwrap(),
779        ];
780
781        // Write BAM file
782        let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
783        for record in &records {
784            writer.write(record).unwrap();
785        }
786        drop(writer);
787
788        // Index the BAM file
789        index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
790
791        // Read back and test N strategy
792        let mut reader = IndexedReader::from_path(&bam_path).unwrap();
793        let _header_view = reader.header().clone();
794
795        reader.fetch(("chr1", 19, 20)).unwrap();
796        let pileup_iter = reader.pileup();
797
798        let read_filter = DefaultReadFilter::new(0, 0, 0);
799
800        for pileup_result in pileup_iter {
801            let pileup = pileup_result.unwrap();
802            if pileup.pos() == 19 {
803                let strat = crate::position::mate_fix::MateResolutionStrategy::N;
804
805                let alns: Vec<_> = pileup
806                    .alignments()
807                    .map(|aln| {
808                        let rec = aln.record();
809                        (aln, rec)
810                    })
811                    .collect();
812
813                if alns.len() == 2 {
814                    // N strategy should always return N for different bases
815                    let result = strat.cmp(&alns[0], &alns[1], &read_filter);
816                    assert!(matches!(
817                        result.recommended_base,
818                        Some(crate::position::mate_fix::Base::N)
819                    ));
820                }
821                break;
822            }
823        }
824    }
825}