Skip to main content

cyanea_seq/
quality.rs

1//! Phred quality scores for sequencing reads.
2//!
3//! [`QualityScores`] stores decoded Phred quality values (not raw ASCII).
4//! Supports both Phred+33 (Illumina 1.8+) and Phred+64 (older Illumina)
5//! encodings via [`PhredEncoding`].
6
7use cyanea_core::{CyaneaError, Result, Scored};
8
9/// Quality score encoding scheme.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
12pub enum PhredEncoding {
13    /// Phred+33 (Sanger / Illumina 1.8+). Most common modern encoding.
14    Phred33,
15    /// Phred+64 (Illumina 1.3–1.7).
16    Phred64,
17}
18
19impl PhredEncoding {
20    fn offset(self) -> u8 {
21        match self {
22            PhredEncoding::Phred33 => 33,
23            PhredEncoding::Phred64 => 64,
24        }
25    }
26}
27
28/// Decoded Phred quality scores.
29///
30/// Stores raw Phred values (typically 0–41), not ASCII-encoded bytes.
31#[derive(Debug, Clone, PartialEq, Eq)]
32#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
33pub struct QualityScores {
34    scores: Vec<u8>,
35}
36
37impl QualityScores {
38    /// Create from raw Phred quality values (already decoded).
39    pub fn from_raw(scores: Vec<u8>) -> Self {
40        Self { scores }
41    }
42
43    /// Decode from ASCII-encoded quality bytes.
44    pub fn from_ascii(ascii: &[u8], encoding: PhredEncoding) -> Result<Self> {
45        let offset = encoding.offset();
46        let mut scores = Vec::with_capacity(ascii.len());
47        for (i, &b) in ascii.iter().enumerate() {
48            if b < offset {
49                return Err(CyaneaError::InvalidInput(format!(
50                    "quality byte {} (0x{:02X}) at position {} is below {:?} offset {}",
51                    b as char, b, i, encoding, offset
52                )));
53            }
54            scores.push(b - offset);
55        }
56        Ok(Self { scores })
57    }
58
59    /// Encode back to ASCII using the given encoding.
60    pub fn to_ascii(&self, encoding: PhredEncoding) -> Vec<u8> {
61        let offset = encoding.offset();
62        self.scores.iter().map(|&q| q + offset).collect()
63    }
64
65    /// Number of quality scores.
66    pub fn len(&self) -> usize {
67        self.scores.len()
68    }
69
70    /// Whether the quality scores are empty.
71    pub fn is_empty(&self) -> bool {
72        self.scores.is_empty()
73    }
74
75    /// Raw quality scores as a slice.
76    pub fn as_slice(&self) -> &[u8] {
77        &self.scores
78    }
79
80    /// Mean quality score.
81    pub fn mean(&self) -> f64 {
82        if self.scores.is_empty() {
83            return 0.0;
84        }
85        let sum: u64 = self.scores.iter().map(|&q| q as u64).sum();
86        sum as f64 / self.scores.len() as f64
87    }
88
89    /// Minimum quality score.
90    pub fn min(&self) -> Option<u8> {
91        self.scores.iter().copied().min()
92    }
93
94    /// Maximum quality score.
95    pub fn max(&self) -> Option<u8> {
96        self.scores.iter().copied().max()
97    }
98
99    /// Fraction of scores at or above the given threshold.
100    pub fn fraction_above(&self, threshold: u8) -> f64 {
101        if self.scores.is_empty() {
102            return 0.0;
103        }
104        let count = self.scores.iter().filter(|&&q| q >= threshold).count();
105        count as f64 / self.scores.len() as f64
106    }
107
108    /// Error probability for a single Phred quality score: 10^(-Q/10).
109    pub fn error_probability(phred: u8) -> f64 {
110        10.0_f64.powf(-(phred as f64) / 10.0)
111    }
112}
113
114impl Scored for QualityScores {
115    fn score(&self) -> f64 {
116        self.mean()
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn from_ascii_phred33() {
126        // '!' = 33 → Q0, 'I' = 73 → Q40
127        let ascii = b"!I";
128        let q = QualityScores::from_ascii(ascii, PhredEncoding::Phred33).unwrap();
129        assert_eq!(q.as_slice(), &[0, 40]);
130    }
131
132    #[test]
133    fn roundtrip_ascii() {
134        let original = vec![0, 10, 20, 30, 40];
135        let q = QualityScores::from_raw(original.clone());
136        let ascii = q.to_ascii(PhredEncoding::Phred33);
137        let q2 = QualityScores::from_ascii(&ascii, PhredEncoding::Phred33).unwrap();
138        assert_eq!(q2.as_slice(), &original);
139    }
140
141    #[test]
142    fn stats() {
143        let q = QualityScores::from_raw(vec![10, 20, 30, 40]);
144        assert!((q.mean() - 25.0).abs() < 1e-10);
145        assert_eq!(q.min(), Some(10));
146        assert_eq!(q.max(), Some(40));
147        assert!((q.fraction_above(20) - 0.75).abs() < 1e-10);
148        assert!((q.fraction_above(30) - 0.5).abs() < 1e-10);
149    }
150
151    #[test]
152    fn error_probability() {
153        let p = QualityScores::error_probability(10);
154        assert!((p - 0.1).abs() < 1e-10);
155        let p = QualityScores::error_probability(20);
156        assert!((p - 0.01).abs() < 1e-10);
157    }
158
159    #[test]
160    fn scored_trait() {
161        let q = QualityScores::from_raw(vec![20, 30]);
162        assert!((q.score() - 25.0).abs() < 1e-10);
163    }
164}