Skip to main content

fastqc_rust/utils/
phred.rs

1/// Phred quality encoding detection.
2///
3/// Replicates the logic from `Sequence/QualityEncoding/PhredEncoding.java`.
4
5#[derive(Debug)]
6pub struct PhredEncoding {
7    pub name: &'static str,
8    pub offset: u8,
9}
10
11// These constants match the Java SANGER_ENCODING_OFFSET and
12// ILLUMINA_1_3_ENCODING_OFFSET fields exactly.
13const SANGER_ENCODING_OFFSET: u8 = 33;
14const ILLUMINA_1_3_ENCODING_OFFSET: u8 = 64;
15
16/// Detect the Phred encoding from the lowest ASCII character seen in quality strings.
17///
18/// Returns the encoding name and offset, or an error if the character is out of range.
19///
20/// Replicates `PhredEncoding.getFastQEncodingOffset(char)` exactly,
21/// including the boundary conditions at 33, 64, 65, and 126.
22pub fn detect(lowest_char: u8) -> Result<PhredEncoding, String> {
23    if lowest_char < 33 {
24        // Java error message format preserved
25        Err(format!(
26            "No known encodings with chars < 33 (Yours was '{}' with value {})",
27            lowest_char as char, lowest_char
28        ))
29    } else if lowest_char < 64 {
30        Ok(PhredEncoding {
31            name: "Sanger / Illumina 1.9",
32            offset: SANGER_ENCODING_OFFSET,
33        })
34    } else if lowest_char == ILLUMINA_1_3_ENCODING_OFFSET + 1 {
35        // Java checks `== 65` (offset 64 + 1) specifically for Illumina 1.3,
36        // which allowed quality value 1 (ASCII 65). From v1.5 onward the minimum was 2.
37        Ok(PhredEncoding {
38            name: "Illumina 1.3",
39            offset: ILLUMINA_1_3_ENCODING_OFFSET,
40        })
41    } else if lowest_char <= 126 {
42        Ok(PhredEncoding {
43            name: "Illumina 1.5",
44            offset: ILLUMINA_1_3_ENCODING_OFFSET,
45        })
46    } else {
47        // Java error message format preserved
48        Err(format!(
49            "No known encodings with chars > 126 (Yours was {} with value {})",
50            lowest_char as char, lowest_char
51        ))
52    }
53}
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    #[test]
60    fn test_sanger_encoding() {
61        let enc = detect(b'!').unwrap(); // ASCII 33
62        assert_eq!(enc.name, "Sanger / Illumina 1.9");
63        assert_eq!(enc.offset, 33);
64    }
65
66    #[test]
67    fn test_sanger_high_boundary() {
68        let enc = detect(63).unwrap(); // just below 64
69        assert_eq!(enc.name, "Sanger / Illumina 1.9");
70        assert_eq!(enc.offset, 33);
71    }
72
73    #[test]
74    fn test_illumina_1_3() {
75        let enc = detect(65).unwrap(); // exactly 65
76        assert_eq!(enc.name, "Illumina 1.3");
77        assert_eq!(enc.offset, 64);
78    }
79
80    #[test]
81    fn test_illumina_1_5() {
82        let enc = detect(66).unwrap();
83        assert_eq!(enc.name, "Illumina 1.5");
84        assert_eq!(enc.offset, 64);
85    }
86
87    #[test]
88    fn test_illumina_1_5_at_126() {
89        let enc = detect(126).unwrap();
90        assert_eq!(enc.name, "Illumina 1.5");
91        assert_eq!(enc.offset, 64);
92    }
93
94    #[test]
95    fn test_error_below_33() {
96        let err = detect(20).unwrap_err();
97        assert!(err.contains("< 33"));
98    }
99
100    #[test]
101    fn test_error_above_126() {
102        let err = detect(127).unwrap_err();
103        assert!(err.contains("> 126"));
104    }
105}