Skip to main content

fgumi_metrics/
clip.rs

1//! Clipping metrics for the `clip` command.
2//!
3//! This module provides structures and functionality for collecting detailed
4//! statistics about clipping operations performed by `clip`.
5
6use noodles::sam::alignment::record::Cigar;
7use noodles::sam::alignment::record_buf::RecordBuf;
8use serde::{Deserialize, Serialize};
9
10use crate::Metric;
11
12/// Bundled counts for a single clipping operation.
13#[derive(Debug, Clone, Copy, Default)]
14pub struct ClipCounts {
15    /// Number of bases clipped before `clip`
16    pub prior: usize,
17    /// Number of bases clipped from 5' end
18    pub five_prime: usize,
19    /// Number of bases clipped from 3' end
20    pub three_prime: usize,
21    /// Number of bases clipped due to overlap
22    pub overlapping: usize,
23    /// Number of bases clipped due to extending past mate
24    pub extending: usize,
25}
26
27/// Type of read for metrics tracking
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
29pub enum ReadType {
30    Fragment,
31    ReadOne,
32    ReadTwo,
33    Pair,
34    All,
35}
36
37/// Clipping metrics for a specific read type
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct ClippingMetrics {
40    /// The type of read this metric applies to
41    pub read_type: ReadType,
42
43    /// Total number of reads examined
44    pub reads: usize,
45
46    /// Number of reads that became unmapped due to clipping
47    pub reads_unmapped: usize,
48
49    /// Number of reads with any clipping before `clip`
50    pub reads_clipped_pre: usize,
51
52    /// Number of reads with any clipping after `clip`
53    pub reads_clipped_post: usize,
54
55    /// Number of reads clipped on 5' end
56    pub reads_clipped_five_prime: usize,
57
58    /// Number of reads clipped on 3' end
59    pub reads_clipped_three_prime: usize,
60
61    /// Number of reads clipped due to overlapping reads
62    pub reads_clipped_overlapping: usize,
63
64    /// Number of reads clipped due to extending past mate
65    pub reads_clipped_extending: usize,
66
67    /// Total number of aligned bases after clipping
68    pub bases: usize,
69
70    /// Number of bases clipped before `clip`
71    pub bases_clipped_pre: usize,
72
73    /// Number of bases clipped after `clip`
74    pub bases_clipped_post: usize,
75
76    /// Number of bases clipped on 5' end
77    pub bases_clipped_five_prime: usize,
78
79    /// Number of bases clipped on 3' end
80    pub bases_clipped_three_prime: usize,
81
82    /// Number of bases clipped due to overlapping reads
83    pub bases_clipped_overlapping: usize,
84
85    /// Number of bases clipped due to extending past mate
86    pub bases_clipped_extending: usize,
87}
88
89impl ClippingMetrics {
90    /// Creates a new `ClippingMetrics` for the given read type
91    #[must_use]
92    pub fn new(read_type: ReadType) -> Self {
93        Self {
94            read_type,
95            reads: 0,
96            reads_unmapped: 0,
97            reads_clipped_pre: 0,
98            reads_clipped_post: 0,
99            reads_clipped_five_prime: 0,
100            reads_clipped_three_prime: 0,
101            reads_clipped_overlapping: 0,
102            reads_clipped_extending: 0,
103            bases: 0,
104            bases_clipped_pre: 0,
105            bases_clipped_post: 0,
106            bases_clipped_five_prime: 0,
107            bases_clipped_three_prime: 0,
108            bases_clipped_overlapping: 0,
109            bases_clipped_extending: 0,
110        }
111    }
112
113    /// Updates metrics based on a clipping operation
114    ///
115    /// # Arguments
116    /// * `record` - The record that was clipped
117    /// * `counts` - The clip counts for this operation
118    pub fn update(&mut self, record: &RecordBuf, counts: ClipCounts) {
119        self.reads += 1;
120
121        // Count aligned bases
122        let cigar = record.cigar();
123        #[allow(clippy::redundant_closure_for_method_calls)]
124        // clippy suggests incorrect path for Op::len
125        let aligned_bases: usize = cigar
126            .iter()
127            .filter_map(Result::ok)
128            .filter(|op| {
129                use noodles::sam::alignment::record::cigar::op::Kind;
130                matches!(op.kind(), Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch)
131            })
132            .map(|op| op.len())
133            .sum();
134        self.bases += aligned_bases;
135
136        // Track pre-clipping
137        if counts.prior > 0 {
138            self.reads_clipped_pre += 1;
139            self.bases_clipped_pre += counts.prior;
140        }
141
142        // Track 5' clipping
143        if counts.five_prime > 0 {
144            self.reads_clipped_five_prime += 1;
145            self.bases_clipped_five_prime += counts.five_prime;
146        }
147
148        // Track 3' clipping
149        if counts.three_prime > 0 {
150            self.reads_clipped_three_prime += 1;
151            self.bases_clipped_three_prime += counts.three_prime;
152        }
153
154        // Track overlapping clipping
155        if counts.overlapping > 0 {
156            self.reads_clipped_overlapping += 1;
157            self.bases_clipped_overlapping += counts.overlapping;
158        }
159
160        // Track extending clipping
161        if counts.extending > 0 {
162            self.reads_clipped_extending += 1;
163            self.bases_clipped_extending += counts.extending;
164        }
165
166        // Total clipping after ClipBam
167        let additional_clipped =
168            counts.five_prime + counts.three_prime + counts.overlapping + counts.extending;
169        let total_clipped = additional_clipped + counts.prior;
170
171        if total_clipped > 0 {
172            self.reads_clipped_post += 1;
173            self.bases_clipped_post += total_clipped;
174
175            // Check if read became unmapped
176            if record.flags().is_unmapped() && additional_clipped > 0 {
177                self.reads_unmapped += 1;
178            }
179        }
180    }
181
182    /// Adds metrics from another `ClippingMetrics` instance
183    pub fn add(&mut self, other: &ClippingMetrics) {
184        *self += other;
185    }
186}
187
188impl std::ops::AddAssign<&ClippingMetrics> for ClippingMetrics {
189    fn add_assign(&mut self, other: &ClippingMetrics) {
190        self.reads += other.reads;
191        self.reads_unmapped += other.reads_unmapped;
192        self.reads_clipped_pre += other.reads_clipped_pre;
193        self.reads_clipped_post += other.reads_clipped_post;
194        self.reads_clipped_five_prime += other.reads_clipped_five_prime;
195        self.reads_clipped_three_prime += other.reads_clipped_three_prime;
196        self.reads_clipped_overlapping += other.reads_clipped_overlapping;
197        self.reads_clipped_extending += other.reads_clipped_extending;
198        self.bases += other.bases;
199        self.bases_clipped_pre += other.bases_clipped_pre;
200        self.bases_clipped_post += other.bases_clipped_post;
201        self.bases_clipped_five_prime += other.bases_clipped_five_prime;
202        self.bases_clipped_three_prime += other.bases_clipped_three_prime;
203        self.bases_clipped_overlapping += other.bases_clipped_overlapping;
204        self.bases_clipped_extending += other.bases_clipped_extending;
205    }
206}
207
208impl Default for ClippingMetrics {
209    fn default() -> Self {
210        Self::new(ReadType::All)
211    }
212}
213
214impl Metric for ClippingMetrics {
215    fn metric_name() -> &'static str {
216        "clipping"
217    }
218}
219
220/// Collection of clipping metrics for all read types
221pub struct ClippingMetricsCollection {
222    pub fragment: ClippingMetrics,
223    pub read_one: ClippingMetrics,
224    pub read_two: ClippingMetrics,
225    pub pair: ClippingMetrics,
226    pub all: ClippingMetrics,
227}
228
229impl ClippingMetricsCollection {
230    /// Creates a new metrics collection
231    #[must_use]
232    pub fn new() -> Self {
233        Self {
234            fragment: ClippingMetrics::new(ReadType::Fragment),
235            read_one: ClippingMetrics::new(ReadType::ReadOne),
236            read_two: ClippingMetrics::new(ReadType::ReadTwo),
237            pair: ClippingMetrics::new(ReadType::Pair),
238            all: ClippingMetrics::new(ReadType::All),
239        }
240    }
241
242    /// Finalizes metrics by aggregating pair and all categories
243    pub fn finalize(&mut self) {
244        // Aggregate ReadOne and ReadTwo into Pair
245        self.pair.add(&self.read_one);
246        self.pair.add(&self.read_two);
247
248        // Aggregate Fragment and Pair into All
249        self.all.add(&self.fragment);
250        self.all.add(&self.pair);
251    }
252
253    /// Returns all metrics in order
254    #[must_use]
255    pub fn all_metrics(&self) -> [&ClippingMetrics; 5] {
256        [&self.fragment, &self.read_one, &self.read_two, &self.pair, &self.all]
257    }
258}
259
260impl Default for ClippingMetricsCollection {
261    fn default() -> Self {
262        Self::new()
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use fgumi_sam::builder::RecordBuilder;
270
271    /// Creates a test record with the given CIGAR string.
272    /// Sequence and qualities are auto-generated from CIGAR length.
273    fn create_test_record(cigar: &str, is_unmapped: bool) -> RecordBuf {
274        RecordBuilder::new().cigar(cigar).unmapped(is_unmapped).build()
275    }
276
277    #[test]
278    fn test_read_type_variants() {
279        assert_eq!(ReadType::Fragment, ReadType::Fragment);
280        assert_ne!(ReadType::Fragment, ReadType::ReadOne);
281        assert_ne!(ReadType::ReadOne, ReadType::ReadTwo);
282    }
283
284    #[test]
285    fn test_clipping_metrics_new() {
286        let metrics = ClippingMetrics::new(ReadType::Fragment);
287        assert_eq!(metrics.reads, 0);
288        assert_eq!(metrics.reads_unmapped, 0);
289        assert_eq!(metrics.bases, 0);
290        assert_eq!(metrics.bases_clipped_pre, 0);
291    }
292
293    #[test]
294    fn test_clip_counts_default() {
295        let counts = ClipCounts::default();
296        assert_eq!(counts.prior, 0);
297        assert_eq!(counts.five_prime, 0);
298        assert_eq!(counts.three_prime, 0);
299        assert_eq!(counts.overlapping, 0);
300        assert_eq!(counts.extending, 0);
301    }
302
303    #[test]
304    fn test_clipping_metrics_update_no_clipping() {
305        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
306        let record = create_test_record("100M", false);
307
308        metrics.update(&record, ClipCounts::default());
309
310        assert_eq!(metrics.reads, 1);
311        assert_eq!(metrics.bases, 100);
312        assert_eq!(metrics.reads_clipped_pre, 0);
313        assert_eq!(metrics.reads_clipped_post, 0);
314    }
315
316    #[test]
317    fn test_clipping_metrics_update_with_prior_clipping() {
318        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
319        let record = create_test_record("90M", false);
320
321        metrics.update(&record, ClipCounts { prior: 10, ..ClipCounts::default() });
322
323        assert_eq!(metrics.reads, 1);
324        assert_eq!(metrics.reads_clipped_pre, 1);
325        assert_eq!(metrics.bases_clipped_pre, 10);
326        assert_eq!(metrics.reads_clipped_post, 1);
327        assert_eq!(metrics.bases_clipped_post, 10);
328    }
329
330    #[test]
331    fn test_clipping_metrics_update_five_prime() {
332        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
333        let record = create_test_record("95M", false);
334
335        metrics.update(&record, ClipCounts { five_prime: 5, ..ClipCounts::default() });
336
337        assert_eq!(metrics.reads_clipped_five_prime, 1);
338        assert_eq!(metrics.bases_clipped_five_prime, 5);
339        assert_eq!(metrics.reads_clipped_post, 1);
340        assert_eq!(metrics.bases_clipped_post, 5);
341    }
342
343    #[test]
344    fn test_clipping_metrics_update_three_prime() {
345        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
346        let record = create_test_record("97M", false);
347
348        metrics.update(&record, ClipCounts { three_prime: 3, ..ClipCounts::default() });
349
350        assert_eq!(metrics.reads_clipped_three_prime, 1);
351        assert_eq!(metrics.bases_clipped_three_prime, 3);
352        assert_eq!(metrics.reads_clipped_post, 1);
353        assert_eq!(metrics.bases_clipped_post, 3);
354    }
355
356    #[test]
357    fn test_clipping_metrics_update_overlapping() {
358        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
359        let record = create_test_record("80M", false);
360
361        metrics.update(&record, ClipCounts { overlapping: 20, ..ClipCounts::default() });
362
363        assert_eq!(metrics.reads_clipped_overlapping, 1);
364        assert_eq!(metrics.bases_clipped_overlapping, 20);
365        assert_eq!(metrics.reads_clipped_post, 1);
366        assert_eq!(metrics.bases_clipped_post, 20);
367    }
368
369    #[test]
370    fn test_clipping_metrics_update_extending() {
371        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
372        let record = create_test_record("85M", false);
373
374        metrics.update(&record, ClipCounts { extending: 15, ..ClipCounts::default() });
375
376        assert_eq!(metrics.reads_clipped_extending, 1);
377        assert_eq!(metrics.bases_clipped_extending, 15);
378        assert_eq!(metrics.reads_clipped_post, 1);
379        assert_eq!(metrics.bases_clipped_post, 15);
380    }
381
382    #[test]
383    fn test_clipping_metrics_update_all_types() {
384        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
385        let record = create_test_record("50M", false);
386
387        metrics.update(
388            &record,
389            ClipCounts { prior: 10, five_prime: 5, three_prime: 3, overlapping: 20, extending: 12 },
390        );
391
392        assert_eq!(metrics.reads, 1);
393        assert_eq!(metrics.bases_clipped_pre, 10);
394        assert_eq!(metrics.bases_clipped_five_prime, 5);
395        assert_eq!(metrics.bases_clipped_three_prime, 3);
396        assert_eq!(metrics.bases_clipped_overlapping, 20);
397        assert_eq!(metrics.bases_clipped_extending, 12);
398        assert_eq!(metrics.bases_clipped_post, 50); // 10 + 5 + 3 + 20 + 12
399    }
400
401    #[test]
402    fn test_clipping_metrics_update_unmapped() {
403        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
404        let record = create_test_record("", true);
405
406        metrics.update(&record, ClipCounts { five_prime: 100, ..ClipCounts::default() });
407
408        assert_eq!(metrics.reads_unmapped, 1);
409    }
410
411    #[test]
412    fn test_clipping_metrics_update_multiple_records() {
413        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
414
415        for _ in 0..5 {
416            let record = create_test_record("90M", false);
417            metrics.update(&record, ClipCounts { five_prime: 10, ..ClipCounts::default() });
418        }
419
420        assert_eq!(metrics.reads, 5);
421        assert_eq!(metrics.bases, 450);
422        assert_eq!(metrics.reads_clipped_five_prime, 5);
423        assert_eq!(metrics.bases_clipped_five_prime, 50);
424    }
425
426    #[test]
427    fn test_clipping_metrics_add() {
428        let mut metrics1 = ClippingMetrics::new(ReadType::ReadOne);
429        let record1 = create_test_record("90M", false);
430        metrics1.update(&record1, ClipCounts { five_prime: 10, ..ClipCounts::default() });
431
432        let mut metrics2 = ClippingMetrics::new(ReadType::ReadOne);
433        let record2 = create_test_record("85M", false);
434        metrics2.update(&record2, ClipCounts { three_prime: 15, ..ClipCounts::default() });
435
436        metrics1.add(&metrics2);
437
438        assert_eq!(metrics1.reads, 2);
439        assert_eq!(metrics1.bases, 175);
440        assert_eq!(metrics1.reads_clipped_five_prime, 1);
441        assert_eq!(metrics1.reads_clipped_three_prime, 1);
442        assert_eq!(metrics1.bases_clipped_five_prime, 10);
443        assert_eq!(metrics1.bases_clipped_three_prime, 15);
444    }
445
446    #[test]
447    fn test_clipping_metrics_collection_new() {
448        let collection = ClippingMetricsCollection::new();
449
450        assert_eq!(collection.fragment.reads, 0);
451        assert_eq!(collection.read_one.reads, 0);
452        assert_eq!(collection.read_two.reads, 0);
453        assert_eq!(collection.pair.reads, 0);
454        assert_eq!(collection.all.reads, 0);
455    }
456
457    #[test]
458    fn test_clipping_metrics_collection_default() {
459        let collection = ClippingMetricsCollection::default();
460        assert_eq!(collection.fragment.reads, 0);
461    }
462
463    #[test]
464    fn test_clipping_metrics_collection_finalize() {
465        let mut collection = ClippingMetricsCollection::new();
466
467        let record = create_test_record("90M", false);
468        collection.read_one.update(&record, ClipCounts { five_prime: 10, ..ClipCounts::default() });
469        collection
470            .read_two
471            .update(&record, ClipCounts { three_prime: 10, ..ClipCounts::default() });
472        collection.fragment.update(&record, ClipCounts { five_prime: 5, ..ClipCounts::default() });
473
474        collection.finalize();
475
476        // Pair should have ReadOne + ReadTwo
477        assert_eq!(collection.pair.reads, 2);
478        assert_eq!(collection.pair.bases_clipped_five_prime, 10);
479        assert_eq!(collection.pair.bases_clipped_three_prime, 10);
480
481        // All should have Fragment + Pair
482        assert_eq!(collection.all.reads, 3);
483        assert_eq!(collection.all.bases_clipped_five_prime, 15);
484    }
485
486    #[test]
487    fn test_clipping_metrics_collection_all_metrics() {
488        let collection = ClippingMetricsCollection::new();
489        let all_metrics = collection.all_metrics();
490
491        assert_eq!(all_metrics.len(), 5);
492        assert_eq!(all_metrics[0].read_type, ReadType::Fragment);
493        assert_eq!(all_metrics[1].read_type, ReadType::ReadOne);
494        assert_eq!(all_metrics[2].read_type, ReadType::ReadTwo);
495        assert_eq!(all_metrics[3].read_type, ReadType::Pair);
496        assert_eq!(all_metrics[4].read_type, ReadType::All);
497    }
498
499    #[test]
500    fn test_clipping_metrics_cigar_with_deletions() {
501        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
502        let record = create_test_record("40M10D40M", false);
503
504        metrics.update(&record, ClipCounts::default());
505
506        // Only matches count as aligned bases
507        assert_eq!(metrics.bases, 80);
508    }
509
510    #[test]
511    fn test_clipping_metrics_cigar_with_insertions() {
512        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
513        let record = create_test_record("45M5I45M", false);
514
515        metrics.update(&record, ClipCounts::default());
516
517        // Only matches count as aligned bases
518        assert_eq!(metrics.bases, 90);
519    }
520
521    #[test]
522    fn test_clipping_metrics_update_unmapped_no_additional_clipping() {
523        let mut metrics = ClippingMetrics::new(ReadType::ReadOne);
524        let record = create_test_record("", true);
525
526        // No additional clipping, so shouldn't count as unmapped
527        metrics.update(&record, ClipCounts { prior: 10, ..ClipCounts::default() });
528
529        assert_eq!(metrics.reads_unmapped, 0);
530    }
531
532    #[test]
533    fn test_read_type_serialization() {
534        // Test that ReadType can be serialized/deserialized
535        let rt = ReadType::Fragment;
536        assert_eq!(rt, ReadType::Fragment);
537    }
538
539    #[test]
540    fn test_clipping_metrics_add_empty() {
541        let mut metrics1 = ClippingMetrics::new(ReadType::ReadOne);
542        let metrics2 = ClippingMetrics::new(ReadType::ReadOne);
543
544        let record = create_test_record("90M", false);
545        metrics1.update(&record, ClipCounts { five_prime: 10, ..ClipCounts::default() });
546
547        metrics1.add(&metrics2);
548
549        // Should still have just the one record's data
550        assert_eq!(metrics1.reads, 1);
551    }
552
553    #[test]
554    fn test_clipping_metrics_add_assign() {
555        let mut metrics1 = ClippingMetrics::new(ReadType::ReadOne);
556        let record1 = create_test_record("90M", false);
557        metrics1.update(&record1, ClipCounts { five_prime: 10, ..ClipCounts::default() });
558
559        let mut metrics2 = ClippingMetrics::new(ReadType::ReadOne);
560        let record2 = create_test_record("85M", false);
561        metrics2.update(&record2, ClipCounts { three_prime: 15, ..ClipCounts::default() });
562
563        metrics1 += &metrics2;
564
565        assert_eq!(metrics1.reads, 2);
566        assert_eq!(metrics1.bases, 175);
567        assert_eq!(metrics1.reads_clipped_five_prime, 1);
568        assert_eq!(metrics1.reads_clipped_three_prime, 1);
569    }
570
571    #[test]
572    fn test_metric_trait_impl() {
573        assert_eq!(ClippingMetrics::metric_name(), "clipping");
574    }
575}