Skip to main content

fgumi_metrics/
rejection.rs

1//! Rejection reason tracking for reads and templates.
2//!
3//! This module provides rejection reason types for tracking why reads or templates
4//! are rejected during processing, enabling detailed metrics and debugging.
5
6use serde::{Deserialize, Serialize};
7use std::fmt;
8
9/// Reasons why a read or template was rejected during processing.
10///
11/// Each variant represents a specific reason for rejection, allowing for
12/// detailed tracking and reporting of why data was filtered out.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub enum RejectionReason {
15    /// Insufficient reads to generate a consensus
16    InsufficientSupport,
17    /// Read has a different, and minority, set of indels
18    MinorityAlignment,
19    /// Too few reads agreed on the strand orientation
20    InsufficientStrandSupport,
21    /// Base quality scores were below threshold
22    LowBaseQuality,
23    /// Read group had too many N bases
24    ExcessiveNBases,
25    /// Template had no valid alignments
26    NoValidAlignment,
27    /// Reads failed mapping quality threshold
28    LowMappingQuality,
29    /// UMI contained N bases
30    NBasesInUmi,
31    /// Read lacks required UMI tag
32    MissingUmi,
33    /// Reads were marked as not passing filter (PF flag)
34    NotPassingFilter,
35    /// Consensus read had too low mean quality
36    LowMeanQuality,
37    /// Consensus read had insufficient minimum depth
38    InsufficientMinDepth,
39    /// Consensus read had excessive error rate
40    ExcessiveErrorRate,
41    /// UMI was too short
42    UmiTooShort,
43    /// Template had reads on same strand only (no proper pair)
44    SameStrandOnly,
45    /// Duplicate UMI at same genomic position
46    DuplicateUmi,
47    /// Only one of R1 or R2 consensus was generated (orphan)
48    OrphanConsensus,
49    /// Read had zero bases after quality trimming
50    ZeroBasesPostTrimming,
51}
52
53impl RejectionReason {
54    /// Returns a human-readable description.
55    #[must_use]
56    pub fn description(&self) -> &'static str {
57        match self {
58            Self::InsufficientSupport => "Insufficient reads to generate a consensus",
59            Self::MinorityAlignment => "Read has a different, and minority, set of indels",
60            Self::InsufficientStrandSupport => "Too few reads agreed on the strand orientation",
61            Self::LowBaseQuality => "Base quality scores were below threshold",
62            Self::ExcessiveNBases => "Read group had too many N bases",
63            Self::NoValidAlignment => "Template had no valid alignments",
64            Self::LowMappingQuality => "Reads failed mapping quality threshold",
65            Self::NBasesInUmi => "UMI contained N bases",
66            Self::MissingUmi => "Read lacks required UMI tag",
67            Self::NotPassingFilter => "Reads were marked as not passing filter",
68            Self::LowMeanQuality => "Consensus read had too low mean quality",
69            Self::InsufficientMinDepth => "Consensus read had insufficient minimum depth",
70            Self::ExcessiveErrorRate => "Consensus read had excessive error rate",
71            Self::UmiTooShort => "UMI was too short",
72            Self::SameStrandOnly => "Template had reads on same strand only",
73            Self::DuplicateUmi => "Duplicate UMI at same genomic position",
74            Self::OrphanConsensus => "Only one of R1 or R2 consensus generated",
75            Self::ZeroBasesPostTrimming => "Read or mate had zero bases post trimming",
76        }
77    }
78
79    /// Returns the TSV metric key for this rejection reason.
80    #[must_use]
81    pub fn tsv_key(&self) -> &'static str {
82        match self {
83            Self::InsufficientSupport => "raw_reads_rejected_for_insufficient_support",
84            Self::MinorityAlignment => "raw_reads_rejected_for_minority_alignment",
85            Self::InsufficientStrandSupport => "raw_reads_rejected_for_insufficient_strand_support",
86            Self::LowBaseQuality => "raw_reads_rejected_for_low_base_quality",
87            Self::ExcessiveNBases => "raw_reads_rejected_for_excessive_n_bases",
88            Self::NoValidAlignment => "raw_reads_rejected_for_no_valid_alignment",
89            Self::LowMappingQuality => "raw_reads_rejected_for_low_mapping_quality",
90            Self::NBasesInUmi => "raw_reads_rejected_for_n_bases_in_umi",
91            Self::MissingUmi => "raw_reads_rejected_for_missing_umi",
92            Self::NotPassingFilter => "raw_reads_rejected_for_not_passing_filter",
93            Self::LowMeanQuality => "raw_reads_rejected_for_low_mean_quality",
94            Self::InsufficientMinDepth => "raw_reads_rejected_for_insufficient_min_depth",
95            Self::ExcessiveErrorRate => "raw_reads_rejected_for_excessive_error_rate",
96            Self::UmiTooShort => "raw_reads_rejected_for_umi_too_short",
97            Self::SameStrandOnly => "raw_reads_rejected_for_single_strand_only",
98            Self::DuplicateUmi => "raw_reads_rejected_for_duplicate_umi",
99            Self::OrphanConsensus => "raw_reads_rejected_for_orphan_consensus",
100            Self::ZeroBasesPostTrimming => "raw_reads_rejected_for_zero_bases_post_trimming",
101        }
102    }
103
104    /// Returns a short description for key-value metrics output.
105    #[must_use]
106    pub fn kv_description(&self) -> &'static str {
107        match self {
108            Self::InsufficientSupport => "Insufficient reads to generate a consensus",
109            Self::MinorityAlignment => "Read has a different, and minority, set of indels",
110            Self::InsufficientStrandSupport => "Insufficient strand support for consensus",
111            Self::LowBaseQuality => "Low base quality",
112            Self::ExcessiveNBases => "Excessive N bases in read",
113            Self::NoValidAlignment => "No valid alignment found",
114            Self::LowMappingQuality => "Low mapping quality",
115            Self::NBasesInUmi => "N bases in UMI sequence",
116            Self::MissingUmi => "Read lacks required UMI tag",
117            Self::NotPassingFilter => "Read did not pass vendor filter",
118            Self::LowMeanQuality => "Low mean base quality",
119            Self::InsufficientMinDepth => "Insufficient minimum read depth",
120            Self::ExcessiveErrorRate => "Excessive error rate",
121            Self::UmiTooShort => "UMI sequence too short",
122            Self::SameStrandOnly => "Only generating one strand of duplex consensus",
123            Self::DuplicateUmi => "Duplicate UMI detected",
124            Self::OrphanConsensus => "Only one of R1 or R2 consensus generated",
125            Self::ZeroBasesPostTrimming => "Read or mate had zero bases post trimming",
126        }
127    }
128}
129
130impl fmt::Display for RejectionReason {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        write!(f, "{}", self.description())
133    }
134}
135
136/// Formats a count with thousands separators.
137///
138/// # Examples
139///
140/// ```
141/// use fgumi_metrics::rejection::format_count;
142///
143/// assert_eq!(format_count(1234567), "1,234,567");
144/// assert_eq!(format_count(123), "123");
145/// ```
146#[must_use]
147pub fn format_count(n: u64) -> String {
148    let s = n.to_string();
149    let bytes = s.as_bytes();
150    let len = bytes.len();
151    let num_commas = if len > 3 { (len - 1) / 3 } else { 0 };
152    let mut result = String::with_capacity(len + num_commas);
153    for (i, &byte) in bytes.iter().enumerate() {
154        if i > 0 && (len - i).is_multiple_of(3) {
155            result.push(',');
156        }
157        result.push(byte as char);
158    }
159    result
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn test_rejection_reason_description() {
168        assert!(RejectionReason::LowBaseQuality.description().contains("quality"));
169        assert!(RejectionReason::InsufficientSupport.description().contains("Insufficient"));
170        assert_eq!(
171            RejectionReason::MinorityAlignment.to_string(),
172            "Read has a different, and minority, set of indels"
173        );
174    }
175
176    #[test]
177    fn test_tsv_key_prefix() {
178        let all_reasons = [
179            RejectionReason::InsufficientSupport,
180            RejectionReason::MinorityAlignment,
181            RejectionReason::InsufficientStrandSupport,
182            RejectionReason::LowBaseQuality,
183            RejectionReason::ExcessiveNBases,
184            RejectionReason::NoValidAlignment,
185            RejectionReason::LowMappingQuality,
186            RejectionReason::NBasesInUmi,
187            RejectionReason::MissingUmi,
188            RejectionReason::NotPassingFilter,
189            RejectionReason::LowMeanQuality,
190            RejectionReason::InsufficientMinDepth,
191            RejectionReason::ExcessiveErrorRate,
192            RejectionReason::UmiTooShort,
193            RejectionReason::SameStrandOnly,
194            RejectionReason::DuplicateUmi,
195            RejectionReason::OrphanConsensus,
196            RejectionReason::ZeroBasesPostTrimming,
197        ];
198        for reason in &all_reasons {
199            assert!(
200                reason.tsv_key().starts_with("raw_reads_rejected_for_"),
201                "tsv_key for {:?} does not have expected prefix: {}",
202                reason,
203                reason.tsv_key()
204            );
205        }
206    }
207
208    #[test]
209    fn test_kv_description_non_empty() {
210        let all_reasons = [
211            RejectionReason::InsufficientSupport,
212            RejectionReason::MinorityAlignment,
213            RejectionReason::InsufficientStrandSupport,
214            RejectionReason::LowBaseQuality,
215            RejectionReason::ExcessiveNBases,
216            RejectionReason::NoValidAlignment,
217            RejectionReason::LowMappingQuality,
218            RejectionReason::NBasesInUmi,
219            RejectionReason::MissingUmi,
220            RejectionReason::NotPassingFilter,
221            RejectionReason::LowMeanQuality,
222            RejectionReason::InsufficientMinDepth,
223            RejectionReason::ExcessiveErrorRate,
224            RejectionReason::UmiTooShort,
225            RejectionReason::SameStrandOnly,
226            RejectionReason::DuplicateUmi,
227            RejectionReason::OrphanConsensus,
228            RejectionReason::ZeroBasesPostTrimming,
229        ];
230        for reason in &all_reasons {
231            assert!(!reason.kv_description().is_empty(), "kv_description for {reason:?} is empty");
232        }
233    }
234
235    #[test]
236    fn test_format_count() {
237        assert_eq!(format_count(0), "0");
238        assert_eq!(format_count(123), "123");
239        assert_eq!(format_count(1234), "1,234");
240        assert_eq!(format_count(1_234_567), "1,234,567");
241        assert_eq!(format_count(1_000_000_000), "1,000,000,000");
242    }
243}