Skip to main content

fgumi_consensus/
sequence.rs

1//! Consensus sequence data structure.
2//!
3//! This module provides [`ConsensusSequence`], a wrapper type that enforces
4//! invariants for parallel vectors representing consensus base calls.
5
6/// Consensus sequence with per-base annotations.
7///
8/// This struct maintains parallel vectors for bases, quality scores, depths, and errors.
9/// All vectors are guaranteed to have the same length through the API.
10///
11/// # Layout
12///
13/// Uses a Structure-of-Arrays layout for cache-friendly access:
14/// - Sequential access to all bases is cache-efficient
15/// - Sequential access to all qualities is cache-efficient
16/// - etc.
17///
18/// # Example
19///
20/// ```
21/// use fgumi_lib::consensus::ConsensusSequence;
22///
23/// let mut seq = ConsensusSequence::new();
24/// seq.push(b'A', 30, 10, 0);
25/// seq.push(b'C', 25, 8, 1);
26///
27/// assert_eq!(seq.len(), 2);
28/// assert_eq!(seq.bases(), &[b'A', b'C']);
29/// assert_eq!(seq.quals(), &[30, 25]);
30/// ```
31#[derive(Debug, Clone, Default)]
32pub struct ConsensusSequence {
33    /// Consensus sequence bases
34    bases: Vec<u8>,
35    /// Consensus quality scores (Phred-scaled)
36    quals: Vec<u8>,
37    /// Per-base read depth
38    depths: Vec<u16>,
39    /// Per-base error count
40    errors: Vec<u16>,
41}
42
43impl ConsensusSequence {
44    /// Creates a new empty consensus sequence.
45    #[must_use]
46    pub fn new() -> Self {
47        Self::default()
48    }
49
50    /// Creates a new consensus sequence with the given capacity.
51    #[must_use]
52    pub fn with_capacity(capacity: usize) -> Self {
53        Self {
54            bases: Vec::with_capacity(capacity),
55            quals: Vec::with_capacity(capacity),
56            depths: Vec::with_capacity(capacity),
57            errors: Vec::with_capacity(capacity),
58        }
59    }
60
61    /// Creates a consensus sequence from parallel vectors.
62    ///
63    /// # Panics
64    ///
65    /// Panics if the vectors have different lengths.
66    #[must_use]
67    pub fn from_vecs(bases: Vec<u8>, quals: Vec<u8>, depths: Vec<u16>, errors: Vec<u16>) -> Self {
68        let len = bases.len();
69        assert!(
70            quals.len() == len && depths.len() == len && errors.len() == len,
71            "All vectors must have the same length: bases={}, quals={}, depths={}, errors={}",
72            len,
73            quals.len(),
74            depths.len(),
75            errors.len()
76        );
77        Self { bases, quals, depths, errors }
78    }
79
80    /// Returns the length of the consensus sequence.
81    #[must_use]
82    pub fn len(&self) -> usize {
83        self.bases.len()
84    }
85
86    /// Returns true if the consensus sequence is empty.
87    #[must_use]
88    pub fn is_empty(&self) -> bool {
89        self.bases.is_empty()
90    }
91
92    /// Appends a single base with its annotations.
93    pub fn push(&mut self, base: u8, qual: u8, depth: u16, error: u16) {
94        self.bases.push(base);
95        self.quals.push(qual);
96        self.depths.push(depth);
97        self.errors.push(error);
98    }
99
100    /// Extends this sequence with another.
101    pub fn extend(&mut self, other: &ConsensusSequence) {
102        self.bases.extend_from_slice(&other.bases);
103        self.quals.extend_from_slice(&other.quals);
104        self.depths.extend_from_slice(&other.depths);
105        self.errors.extend_from_slice(&other.errors);
106    }
107
108    /// Clears all vectors.
109    pub fn clear(&mut self) {
110        self.bases.clear();
111        self.quals.clear();
112        self.depths.clear();
113        self.errors.clear();
114    }
115
116    /// Returns the consensus bases.
117    #[must_use]
118    pub fn bases(&self) -> &[u8] {
119        &self.bases
120    }
121
122    /// Returns mutable access to the consensus bases.
123    ///
124    /// # Note
125    ///
126    /// The caller should not change the length (slices prevent this anyway).
127    #[must_use]
128    pub fn bases_mut(&mut self) -> &mut [u8] {
129        &mut self.bases
130    }
131
132    /// Returns the quality scores.
133    #[must_use]
134    pub fn quals(&self) -> &[u8] {
135        &self.quals
136    }
137
138    /// Returns mutable access to the quality scores.
139    ///
140    /// # Note
141    ///
142    /// The caller should not change the length (slices prevent this anyway).
143    #[must_use]
144    pub fn quals_mut(&mut self) -> &mut [u8] {
145        &mut self.quals
146    }
147
148    /// Returns the per-base depths.
149    #[must_use]
150    pub fn depths(&self) -> &[u16] {
151        &self.depths
152    }
153
154    /// Returns mutable access to the per-base depths.
155    ///
156    /// # Note
157    ///
158    /// The caller should not change the length (slices prevent this anyway).
159    #[must_use]
160    pub fn depths_mut(&mut self) -> &mut [u16] {
161        &mut self.depths
162    }
163
164    /// Returns the per-base error counts.
165    #[must_use]
166    pub fn errors(&self) -> &[u16] {
167        &self.errors
168    }
169
170    /// Returns mutable access to the per-base error counts.
171    ///
172    /// # Note
173    ///
174    /// The caller should not change the length (slices prevent this anyway).
175    #[must_use]
176    pub fn errors_mut(&mut self) -> &mut [u16] {
177        &mut self.errors
178    }
179
180    /// Returns the maximum depth across all positions.
181    #[must_use]
182    pub fn max_depth(&self) -> u16 {
183        self.depths.iter().copied().max().unwrap_or(0)
184    }
185
186    /// Returns the minimum depth across all positions.
187    #[must_use]
188    pub fn min_depth(&self) -> u16 {
189        self.depths.iter().copied().min().unwrap_or(0)
190    }
191
192    /// Returns the error rate (total errors / total depth).
193    #[must_use]
194    pub fn error_rate(&self) -> f32 {
195        crate::caller::calculate_error_rate(&self.depths, &self.errors)
196    }
197
198    /// Decomposes this sequence into its component vectors.
199    ///
200    /// This is useful when the caller needs ownership of the vectors.
201    #[must_use]
202    pub fn into_vecs(self) -> (Vec<u8>, Vec<u8>, Vec<u16>, Vec<u16>) {
203        (self.bases, self.quals, self.depths, self.errors)
204    }
205
206    /// Pads the sequence to a new length by adding values to the left or right.
207    ///
208    /// # Arguments
209    /// * `new_length` - Target length (must be >= current length)
210    /// * `left` - If true, pad on the left side; otherwise pad on the right
211    /// * `base` - Base to use for padding (default: 'n')
212    /// * `qual` - Quality to use for padding (default: 2)
213    ///
214    /// # Panics
215    /// Panics if `new_length` < current length
216    #[must_use]
217    pub fn padded(&self, new_length: usize, left: bool, base: u8, qual: u8) -> Self {
218        let current_len = self.bases.len();
219        assert!(
220            new_length >= current_len,
221            "new_length ({new_length}) must be >= current length ({current_len})"
222        );
223
224        if new_length == current_len {
225            return self.clone();
226        }
227
228        let pad_len = new_length - current_len;
229
230        // Pre-allocate with exact capacity (avoids intermediate allocations)
231        let mut bases = Vec::with_capacity(new_length);
232        let mut quals = Vec::with_capacity(new_length);
233        let mut depths = Vec::with_capacity(new_length);
234        let mut errors = Vec::with_capacity(new_length);
235
236        if left {
237            // Padding on left: [padding][original]
238            bases.resize(pad_len, base);
239            quals.resize(pad_len, qual);
240            depths.resize(pad_len, 0);
241            errors.resize(pad_len, 0);
242
243            bases.extend_from_slice(&self.bases);
244            quals.extend_from_slice(&self.quals);
245            depths.extend_from_slice(&self.depths);
246            errors.extend_from_slice(&self.errors);
247        } else {
248            // Padding on right: [original][padding]
249            bases.extend_from_slice(&self.bases);
250            quals.extend_from_slice(&self.quals);
251            depths.extend_from_slice(&self.depths);
252            errors.extend_from_slice(&self.errors);
253
254            bases.resize(new_length, base);
255            quals.resize(new_length, qual);
256            depths.resize(new_length, 0);
257            errors.resize(new_length, 0);
258        }
259
260        Self { bases, quals, depths, errors }
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn test_new_is_empty() {
270        let seq = ConsensusSequence::new();
271        assert!(seq.is_empty());
272        assert_eq!(seq.len(), 0);
273    }
274
275    #[test]
276    fn test_push_and_accessors() {
277        let mut seq = ConsensusSequence::new();
278        seq.push(b'A', 30, 10, 0);
279        seq.push(b'C', 25, 8, 1);
280        seq.push(b'G', 20, 5, 2);
281
282        assert_eq!(seq.len(), 3);
283        assert!(!seq.is_empty());
284        assert_eq!(seq.bases(), b"ACG");
285        assert_eq!(seq.quals(), &[30, 25, 20]);
286        assert_eq!(seq.depths(), &[10, 8, 5]);
287        assert_eq!(seq.errors(), &[0, 1, 2]);
288    }
289
290    #[test]
291    fn test_from_vecs() {
292        let seq =
293            ConsensusSequence::from_vecs(vec![b'A', b'T'], vec![30, 25], vec![10, 8], vec![0, 1]);
294
295        assert_eq!(seq.len(), 2);
296        assert_eq!(seq.bases(), b"AT");
297    }
298
299    #[test]
300    #[should_panic(expected = "All vectors must have the same length")]
301    fn test_from_vecs_mismatched_lengths() {
302        let _ = ConsensusSequence::from_vecs(
303            vec![b'A', b'T'],
304            vec![30], // Wrong length
305            vec![10, 8],
306            vec![0, 1],
307        );
308    }
309
310    #[test]
311    fn test_extend() {
312        let mut seq1 = ConsensusSequence::from_vecs(vec![b'A'], vec![30], vec![10], vec![0]);
313        let seq2 =
314            ConsensusSequence::from_vecs(vec![b'T', b'C'], vec![25, 20], vec![8, 5], vec![1, 0]);
315
316        seq1.extend(&seq2);
317        assert_eq!(seq1.len(), 3);
318        assert_eq!(seq1.bases(), b"ATC");
319    }
320
321    #[test]
322    fn test_max_min_depth() {
323        let seq = ConsensusSequence::from_vecs(
324            vec![b'A', b'T', b'C'],
325            vec![30, 25, 20],
326            vec![10, 5, 15],
327            vec![0, 0, 0],
328        );
329
330        assert_eq!(seq.max_depth(), 15);
331        assert_eq!(seq.min_depth(), 5);
332    }
333
334    #[test]
335    fn test_error_rate() {
336        let seq = ConsensusSequence::from_vecs(
337            vec![b'A', b'T', b'C', b'G'],
338            vec![30, 30, 30, 30],
339            vec![10, 10, 10, 10], // Total depth = 40
340            vec![0, 1, 1, 2],     // Total errors = 4
341        );
342
343        // Error rate = 4/40 = 0.1
344        assert!((seq.error_rate() - 0.1).abs() < 0.001);
345    }
346
347    #[test]
348    fn test_error_rate_zero_depth() {
349        let seq = ConsensusSequence::new();
350        assert!((seq.error_rate() - 0.0).abs() < f32::EPSILON);
351    }
352
353    #[test]
354    fn test_into_vecs() {
355        let seq =
356            ConsensusSequence::from_vecs(vec![b'A', b'T'], vec![30, 25], vec![10, 8], vec![0, 1]);
357
358        let (bases, quals, depths, errors) = seq.into_vecs();
359        assert_eq!(bases, vec![b'A', b'T']);
360        assert_eq!(quals, vec![30, 25]);
361        assert_eq!(depths, vec![10, 8]);
362        assert_eq!(errors, vec![0, 1]);
363    }
364
365    #[test]
366    fn test_padded_right() {
367        let seq =
368            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
369
370        let padded = seq.padded(4, false, b'N', 2);
371        assert_eq!(padded.len(), 4);
372        assert_eq!(padded.bases(), b"ACNN");
373        assert_eq!(padded.quals(), &[30, 25, 2, 2]);
374        assert_eq!(padded.depths(), &[10, 8, 0, 0]);
375    }
376
377    #[test]
378    fn test_padded_left() {
379        let seq =
380            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
381
382        let padded = seq.padded(4, true, b'N', 2);
383        assert_eq!(padded.len(), 4);
384        assert_eq!(padded.bases(), b"NNAC");
385        assert_eq!(padded.quals(), &[2, 2, 30, 25]);
386    }
387
388    #[test]
389    fn test_padded_same_length() {
390        let seq =
391            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
392
393        let padded = seq.padded(2, false, b'N', 2);
394        assert_eq!(padded.len(), 2);
395        assert_eq!(padded.bases(), seq.bases());
396    }
397
398    #[test]
399    fn test_clear() {
400        let mut seq =
401            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
402
403        seq.clear();
404        assert!(seq.is_empty());
405        assert_eq!(seq.len(), 0);
406    }
407
408    #[test]
409    fn test_with_capacity() {
410        let seq = ConsensusSequence::with_capacity(100);
411        assert!(seq.is_empty());
412        // Capacity is an implementation detail, but we can verify the struct is usable
413    }
414
415    // =====================================================================
416    // Tests for mutable accessors
417    // =====================================================================
418
419    #[test]
420    fn test_bases_mut() {
421        let mut seq =
422            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
423
424        seq.bases_mut()[0] = b'T';
425        assert_eq!(seq.bases(), b"TC");
426    }
427
428    #[test]
429    fn test_quals_mut() {
430        let mut seq =
431            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
432
433        seq.quals_mut()[1] = 40;
434        assert_eq!(seq.quals(), &[30, 40]);
435    }
436
437    #[test]
438    fn test_depths_mut() {
439        let mut seq =
440            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
441
442        seq.depths_mut()[0] = 20;
443        assert_eq!(seq.depths(), &[20, 8]);
444    }
445
446    #[test]
447    fn test_errors_mut() {
448        let mut seq =
449            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
450
451        seq.errors_mut()[1] = 3;
452        assert_eq!(seq.errors(), &[0, 3]);
453    }
454
455    #[test]
456    fn test_default_is_empty() {
457        let seq = ConsensusSequence::default();
458        assert!(seq.is_empty());
459        assert_eq!(seq.len(), 0);
460        assert_eq!(seq.max_depth(), 0);
461        assert_eq!(seq.min_depth(), 0);
462    }
463
464    // =====================================================================
465    // Tests for padded edge cases
466    // =====================================================================
467
468    #[test]
469    #[should_panic(expected = "must be >= current length")]
470    fn test_padded_too_short_panics() {
471        let seq = ConsensusSequence::from_vecs(
472            vec![b'A', b'C', b'G'],
473            vec![30, 25, 20],
474            vec![10, 8, 5],
475            vec![0, 1, 0],
476        );
477        let _ = seq.padded(2, false, b'N', 2);
478    }
479
480    #[test]
481    fn test_padded_left_preserves_errors() {
482        let seq =
483            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![2, 3]);
484
485        let padded = seq.padded(4, true, b'N', 2);
486        assert_eq!(padded.errors(), &[0, 0, 2, 3]);
487        assert_eq!(padded.depths(), &[0, 0, 10, 8]);
488    }
489
490    // =====================================================================
491    // Tests for clone and extend
492    // =====================================================================
493
494    #[test]
495    fn test_clone() {
496        let seq =
497            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
498        let cloned = seq.clone();
499        assert_eq!(cloned.bases(), seq.bases());
500        assert_eq!(cloned.quals(), seq.quals());
501        assert_eq!(cloned.depths(), seq.depths());
502        assert_eq!(cloned.errors(), seq.errors());
503    }
504
505    #[test]
506    fn test_extend_empty_into_existing() {
507        let mut seq =
508            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
509        let empty = ConsensusSequence::new();
510        seq.extend(&empty);
511        assert_eq!(seq.len(), 2);
512        assert_eq!(seq.bases(), b"AC");
513    }
514
515    #[test]
516    fn test_extend_into_empty() {
517        let mut empty = ConsensusSequence::new();
518        let seq =
519            ConsensusSequence::from_vecs(vec![b'A', b'C'], vec![30, 25], vec![10, 8], vec![0, 1]);
520        empty.extend(&seq);
521        assert_eq!(empty.len(), 2);
522        assert_eq!(empty.bases(), b"AC");
523    }
524}