Skip to main content

fgumi_lib/
variant_review.rs

1//! Support for reviewing consensus variants
2//!
3//! This module provides structures and utilities for reviewing variant calls
4//! from consensus reads, including tracking base counts and pileup information.
5
6use crate::phred::NO_CALL_BASE;
7use serde::Serialize;
8
9/// Detailed information about a consensus read carrying a variant.
10///
11/// Each row contains information about the variant site (repeated for each
12/// consensus read) and specific information about that consensus read.
13#[derive(Debug, Clone, Serialize)]
14pub struct ConsensusVariantReviewInfo {
15    // Variant site information (same for all reads at this position)
16    /// Chromosome/contig name
17    pub chrom: String,
18    /// Position of the variant (1-based)
19    pub pos: i32,
20    /// Reference allele
21    pub ref_allele: String,
22    /// Genotype string (if available from VCF)
23    pub genotype: String,
24    /// Comma-separated list of filters (or "PASS")
25    pub filters: String,
26
27    // Consensus-level counts (same for all reads at this position)
28    /// Count of A observations at this position across all consensus reads
29    #[serde(rename = "A")]
30    pub consensus_a: usize,
31    /// Count of C observations
32    #[serde(rename = "C")]
33    pub consensus_c: usize,
34    /// Count of G observations
35    #[serde(rename = "G")]
36    pub consensus_g: usize,
37    /// Count of T observations
38    #[serde(rename = "T")]
39    pub consensus_t: usize,
40    /// Count of N observations
41    #[serde(rename = "N")]
42    pub consensus_n: usize,
43
44    // Per-consensus-read information
45    /// Consensus read name
46    pub consensus_read: String,
47    /// Insert description (chr:start-end | orientation)
48    pub consensus_insert: String,
49    /// Base call from the consensus read
50    pub consensus_call: char,
51    /// Quality score from the consensus read
52    pub consensus_qual: u8,
53
54    // Raw read counts supporting this consensus base
55    /// Number of As in raw reads supporting this consensus base
56    pub a: usize,
57    /// Number of Cs in raw reads
58    pub c: usize,
59    /// Number of Gs in raw reads
60    pub g: usize,
61    /// Number of Ts in raw reads
62    pub t: usize,
63    /// Number of Ns in raw reads
64    pub n: usize,
65}
66
67/// Represents a variant position
68#[derive(Debug, Clone)]
69pub struct Variant {
70    pub chrom: String,
71    pub pos: i32,
72    pub ref_base: char,
73    pub genotype: Option<String>,
74    pub filters: Option<String>,
75}
76
77impl Variant {
78    #[must_use]
79    pub fn new(chrom: String, pos: i32, ref_base: char) -> Self {
80        Self { chrom, pos, ref_base, genotype: None, filters: None }
81    }
82
83    #[must_use]
84    pub fn with_genotype(mut self, genotype: String) -> Self {
85        self.genotype = Some(genotype);
86        self
87    }
88
89    #[must_use]
90    pub fn with_filters(mut self, filters: String) -> Self {
91        self.filters = Some(filters);
92        self
93    }
94}
95
96/// Base counts at a position
97#[derive(Debug, Default, Clone)]
98pub struct BaseCounts {
99    pub a: usize,
100    pub c: usize,
101    pub g: usize,
102    pub t: usize,
103    pub n: usize,
104}
105
106impl BaseCounts {
107    pub fn add_base(&mut self, base: u8) {
108        match base.to_ascii_uppercase() {
109            b'A' => self.a += 1,
110            b'C' => self.c += 1,
111            b'G' => self.g += 1,
112            b'T' => self.t += 1,
113            NO_CALL_BASE => self.n += 1,
114            _ => {}
115        }
116    }
117
118    #[must_use]
119    pub fn total(&self) -> usize {
120        self.a + self.c + self.g + self.t + self.n
121    }
122}
123
124/// Generates a /1 or /2 suffix based on read pair information
125#[must_use]
126pub fn read_number_suffix(is_first_of_pair: bool) -> &'static str {
127    if is_first_of_pair { "/1" } else { "/2" }
128}
129
130/// Generates an insert string for a consensus read
131///
132/// Returns a string like "chr1:100-200 | F1R2" for FR pairs, or "NA" for non-FR pairs
133#[must_use]
134#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
135pub fn format_insert_string(
136    record: &noodles::sam::alignment::RecordBuf,
137    header: &noodles::sam::Header,
138) -> String {
139    let flags = record.flags();
140
141    // Only generate strings for FR pairs, all other reads get "NA"
142    // Must be paired, both reads mapped, and on same reference with FR orientation
143    if !flags.is_segmented() {
144        return "NA".to_string();
145    }
146
147    if flags.is_unmapped() || flags.is_mate_unmapped() {
148        return "NA".to_string();
149    }
150
151    // Check that mate is on same reference
152    let Some(ref_idx) = record.reference_sequence_id() else {
153        return "NA".to_string();
154    };
155
156    let Some(mate_ref_idx) = record.mate_reference_sequence_id() else {
157        return "NA".to_string();
158    };
159
160    if ref_idx != mate_ref_idx {
161        return "NA".to_string();
162    }
163
164    // Check for FR orientation: one forward, one reverse
165    let is_reverse = flags.is_reverse_complemented();
166    let mate_is_reverse = flags.is_mate_reverse_complemented();
167
168    if is_reverse == mate_is_reverse {
169        return "NA".to_string(); // Both same orientation (FF or RR)
170    }
171
172    // Get reference sequence name
173    let ref_name = match record.reference_sequence(header) {
174        Some(Ok((name, _))) => String::from_utf8_lossy(name).to_string(),
175        _ => return "NA".to_string(),
176    };
177
178    // Calculate outer position based on Scala logic:
179    // val outer = if (r.negativeStrand) r.end else r.start
180    let outer = if is_reverse {
181        // If negative strand, use alignment end
182        match record.alignment_end() {
183            Some(pos) => usize::from(pos) as i32,
184            None => return "NA".to_string(),
185        }
186    } else {
187        // If positive strand, use alignment start
188        match record.alignment_start() {
189            Some(pos) => usize::from(pos) as i32,
190            None => return "NA".to_string(),
191        }
192    };
193
194    // Get insert size (signed)
195    let isize = record.template_length();
196
197    // Calculate start and end coordinates using Scala logic:
198    // val Seq(start, end) = Seq(outer, outer + isize + (if (isize < 0) 1 else -1)).sorted
199    let other_coord = outer + isize + if isize < 0 { 1 } else { -1 };
200    let (start, end) =
201        if outer < other_coord { (outer, other_coord) } else { (other_coord, outer) };
202
203    // Determine pairing using Scala logic:
204    // val pairing = (r.firstOfPair, start == outer) match {
205    //   case (true, true)   => "F1R2"
206    //   case (true, false)  => "F2R1"
207    //   case (false, true)  => "F2R1"
208    //   case (false, false) => "F1R2"
209    // }
210    let is_first = flags.is_first_segment();
211    let start_equals_outer = start == outer;
212
213    let pairing = match (is_first, start_equals_outer) {
214        (true, true) | (false, false) => "F1R2",
215        (true, false) | (false, true) => "F2R1",
216    };
217
218    format!("{ref_name}:{start}-{end} | {pairing}")
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224    use crate::sam::builder::RecordBuilder;
225    use noodles::sam::Header;
226    use noodles::sam::alignment::RecordBuf;
227
228    #[test]
229    fn test_read_number_suffix() {
230        assert_eq!(read_number_suffix(true), "/1");
231        assert_eq!(read_number_suffix(false), "/2");
232    }
233
234    #[test]
235    fn test_format_insert_string_unpaired() {
236        // Create a simple SAM header
237        let header = Header::builder().build();
238
239        // Create an unpaired read (no segmented flag)
240        let record = RecordBuilder::new().sequence("ACGT").build();
241
242        let result = format_insert_string(&record, &header);
243        assert_eq!(result, "NA");
244    }
245
246    #[test]
247    fn test_base_counts_new() {
248        let counts = BaseCounts::default();
249        assert_eq!(counts.a, 0);
250        assert_eq!(counts.c, 0);
251        assert_eq!(counts.g, 0);
252        assert_eq!(counts.t, 0);
253        assert_eq!(counts.n, 0);
254        assert_eq!(counts.total(), 0);
255    }
256
257    #[test]
258    fn test_base_counts_add_base() {
259        let mut counts = BaseCounts::default();
260
261        counts.add_base(b'A');
262        assert_eq!(counts.a, 1);
263        assert_eq!(counts.total(), 1);
264
265        counts.add_base(b'a'); // Test lowercase
266        assert_eq!(counts.a, 2);
267
268        counts.add_base(b'C');
269        counts.add_base(b'G');
270        counts.add_base(b'T');
271        counts.add_base(b'N');
272
273        assert_eq!(counts.c, 1);
274        assert_eq!(counts.g, 1);
275        assert_eq!(counts.t, 1);
276        assert_eq!(counts.n, 1);
277        assert_eq!(counts.total(), 6);
278    }
279
280    #[test]
281    fn test_base_counts_ignore_non_bases() {
282        let mut counts = BaseCounts::default();
283
284        counts.add_base(b'X');
285        counts.add_base(b'-');
286        counts.add_base(b'.');
287
288        assert_eq!(counts.total(), 0);
289    }
290
291    #[test]
292    fn test_variant_new() {
293        let variant = Variant::new("chr1".to_string(), 100, 'A');
294        assert_eq!(variant.chrom, "chr1");
295        assert_eq!(variant.pos, 100);
296        assert_eq!(variant.ref_base, 'A');
297        assert!(variant.genotype.is_none());
298        assert!(variant.filters.is_none());
299    }
300
301    #[test]
302    fn test_variant_with_genotype_and_filters() {
303        let variant = Variant::new("chr1".to_string(), 100, 'A')
304            .with_genotype("0/1".to_string())
305            .with_filters("PASS".to_string());
306
307        assert_eq!(variant.genotype, Some("0/1".to_string()));
308        assert_eq!(variant.filters, Some("PASS".to_string()));
309    }
310
311    // Helper to create a test header with chr1
312    fn create_test_header() -> Header {
313        use bstr::BString;
314        use noodles::sam::header::record::value::{Map, map::ReferenceSequence};
315        use std::num::NonZeroUsize;
316
317        Header::builder()
318            .add_reference_sequence(
319                BString::from("chr1"),
320                Map::<ReferenceSequence>::new(NonZeroUsize::try_from(1000).unwrap()),
321            )
322            .build()
323    }
324
325    // Helper to create a paired read with specific parameters
326    fn create_paired_read(
327        start: usize,
328        mate_start: usize,
329        is_reverse: bool,
330        mate_is_reverse: bool,
331        is_first: bool,
332        template_len: i32,
333    ) -> RecordBuf {
334        RecordBuilder::new()
335            .sequence(&"A".repeat(10))
336            .cigar("10M")
337            .reference_sequence_id(0)
338            .alignment_start(start)
339            .mate_reference_sequence_id(0)
340            .mate_alignment_start(mate_start)
341            .template_length(template_len)
342            .first_segment(is_first)
343            .reverse_complement(is_reverse)
344            .mate_reverse_complement(mate_is_reverse)
345            .build()
346    }
347
348    #[test]
349    fn test_format_insert_string_unmapped_returns_na() {
350        use noodles::sam::alignment::record::Flags;
351
352        let header = create_test_header();
353        let mut record =
354            RecordBuilder::new().sequence("ACGT").first_segment(true).unmapped(true).build();
355
356        // Ensure SEGMENTED | UNMAPPED flags are set
357        *record.flags_mut() = Flags::SEGMENTED | Flags::UNMAPPED;
358
359        let result = format_insert_string(&record, &header);
360        assert_eq!(result, "NA");
361    }
362
363    #[test]
364    fn test_format_insert_string_mate_unmapped_returns_na() {
365        let header = create_test_header();
366        let mut record = create_paired_read(100, 200, false, true, true, 101);
367
368        // Set mate as unmapped
369        *record.flags_mut() |= noodles::sam::alignment::record::Flags::MATE_UNMAPPED;
370
371        let result = format_insert_string(&record, &header);
372        assert_eq!(result, "NA");
373    }
374
375    #[test]
376    fn test_format_insert_string_same_orientation_returns_na() {
377        let header = create_test_header();
378
379        // Both forward (FF)
380        let record = create_paired_read(100, 200, false, false, true, 101);
381        let result = format_insert_string(&record, &header);
382        assert_eq!(result, "NA");
383
384        // Both reverse (RR)
385        let record = create_paired_read(100, 200, true, true, true, -101);
386        let result = format_insert_string(&record, &header);
387        assert_eq!(result, "NA");
388    }
389
390    #[test]
391    fn test_format_insert_string_cross_contig_returns_na() {
392        let header = create_test_header();
393        let mut record = create_paired_read(100, 200, false, true, true, 101);
394
395        // Set mate on different reference
396        *record.mate_reference_sequence_id_mut() = Some(1);
397
398        let result = format_insert_string(&record, &header);
399        assert_eq!(result, "NA");
400    }
401
402    #[test]
403    fn test_format_insert_string_fr_pair_f1r2() {
404        let header = create_test_header();
405
406        // First read forward (start=100), second read reverse (start=191)
407        // Template length = 101 (positive for forward read)
408        // Expected: chr1:100-200 | F1R2
409        let record = create_paired_read(100, 191, false, true, true, 101);
410        let result = format_insert_string(&record, &header);
411        assert_eq!(result, "chr1:100-200 | F1R2");
412    }
413
414    #[test]
415    fn test_format_insert_string_fr_pair_f2r1() {
416        let header = create_test_header();
417
418        // First read reverse (start=191), second read forward (start=100)
419        // Template length = -101 (negative for reverse read)
420        // Expected: chr1:100-200 | F2R1
421        let record = create_paired_read(191, 100, true, false, true, -101);
422        let result = format_insert_string(&record, &header);
423        assert_eq!(result, "chr1:100-200 | F2R1");
424    }
425
426    #[test]
427    fn test_format_insert_string_second_of_pair_forward() {
428        let header = create_test_header();
429
430        // Second read forward (start=100), first read reverse (start=191)
431        // is_first=false, forward orientation
432        // Expected: chr1:100-200 | F2R1
433        let record = create_paired_read(100, 191, false, true, false, 101);
434        let result = format_insert_string(&record, &header);
435        assert_eq!(result, "chr1:100-200 | F2R1");
436    }
437
438    #[test]
439    fn test_format_insert_string_second_of_pair_reverse() {
440        let header = create_test_header();
441
442        // Second read reverse (start=191), first read forward (start=100)
443        // is_first=false, reverse orientation
444        // Expected: chr1:100-200 | F1R2
445        let record = create_paired_read(191, 100, true, false, false, -101);
446        let result = format_insert_string(&record, &header);
447        assert_eq!(result, "chr1:100-200 | F1R2");
448    }
449}