holodeck_lib/error_model/
illumina.rs1use super::{ErrorModel, ReadEnd};
2
3const DECAY_START_FRACTION: f64 = 0.7;
5
6const DEFAULT_R2_MULTIPLIER: f64 = 1.5;
8
9#[derive(Debug, Clone, Copy)]
11struct CycleParams {
12 p_err: f64,
14 base_qual: u8,
16}
17
18pub struct IlluminaErrorModel {
33 read_length: usize,
35 min_error_rate: f64,
37 max_error_rate: f64,
39 r2_multiplier: f64,
41 decay_start: usize,
43 r1_params: Vec<CycleParams>,
45 r2_params: Vec<CycleParams>,
47}
48
49impl IlluminaErrorModel {
50 #[must_use]
60 pub fn new(read_length: usize, min_error_rate: f64, max_error_rate: f64) -> Self {
61 Self::with_r2_multiplier(read_length, min_error_rate, max_error_rate, DEFAULT_R2_MULTIPLIER)
62 }
63
64 #[must_use]
72 pub fn with_r2_multiplier(
73 read_length: usize,
74 min_error_rate: f64,
75 max_error_rate: f64,
76 r2_multiplier: f64,
77 ) -> Self {
78 assert!(read_length > 0, "read_length must be > 0");
79 assert!(
80 min_error_rate <= max_error_rate,
81 "min_error_rate ({min_error_rate}) must be <= max_error_rate ({max_error_rate})"
82 );
83
84 #[expect(clippy::cast_possible_truncation, reason = "product is bounded by read_length")]
85 #[expect(clippy::cast_sign_loss, reason = "product is non-negative")]
86 let decay_start = (read_length as f64 * DECAY_START_FRACTION).round() as usize;
87
88 let mut model = Self {
89 read_length,
90 min_error_rate,
91 max_error_rate,
92 r2_multiplier,
93 decay_start,
94 r1_params: Vec::new(),
95 r2_params: Vec::new(),
96 };
97
98 model.r1_params =
100 (0..read_length).map(|c| model.compute_cycle_params(c, ReadEnd::Read1)).collect();
101 model.r2_params =
102 (0..read_length).map(|c| model.compute_cycle_params(c, ReadEnd::Read2)).collect();
103
104 model
105 }
106}
107
108impl IlluminaErrorModel {
109 fn compute_error_probability(&self, cycle: usize, read_end: ReadEnd) -> f64 {
111 let base_rate = if cycle < self.decay_start {
112 self.min_error_rate
113 } else {
114 let ramp_len = self.read_length.saturating_sub(self.decay_start).max(1);
115 let progress = (cycle - self.decay_start) as f64 / ramp_len as f64;
116 self.min_error_rate + (self.max_error_rate - self.min_error_rate) * progress.min(1.0)
117 };
118
119 match read_end {
120 ReadEnd::Read1 => base_rate,
121 ReadEnd::Read2 => (base_rate * self.r2_multiplier).min(1.0),
122 }
123 }
124
125 fn compute_cycle_params(&self, cycle: usize, read_end: ReadEnd) -> CycleParams {
127 let p_err = self.compute_error_probability(cycle, read_end);
128
129 #[expect(clippy::cast_possible_truncation, reason = "clamped to [2, 41]")]
130 #[expect(clippy::cast_sign_loss, reason = "clamped to [2, 41]")]
131 let base_qual = if p_err > 0.0 {
132 (-10.0 * p_err.log10()).round().clamp(2.0, 41.0) as u8 + 33
133 } else {
134 41 + 33
135 };
136
137 CycleParams { p_err, base_qual }
138 }
139
140 #[inline]
142 fn cycle_params(&self, cycle: usize, read_end: ReadEnd) -> CycleParams {
143 match read_end {
144 ReadEnd::Read1 => self.r1_params[cycle],
145 ReadEnd::Read2 => self.r2_params[cycle],
146 }
147 }
148}
149
150impl ErrorModel for IlluminaErrorModel {
151 fn error_probability(&self, cycle: usize, read_end: ReadEnd) -> f64 {
152 self.cycle_params(cycle, read_end).p_err
153 }
154
155 fn base_quality_phred33(&self, cycle: usize, read_end: ReadEnd) -> u8 {
156 self.cycle_params(cycle, read_end).base_qual
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_error_rate_at_start() {
166 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
167 let p = model.error_probability(0, ReadEnd::Read1);
168 assert!((p - 0.001).abs() < 1e-10);
169 }
170
171 #[test]
172 fn test_error_rate_flat_before_decay() {
173 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
174 for cycle in 0..100 {
176 let p = model.error_probability(cycle, ReadEnd::Read1);
177 assert!((p - 0.001).abs() < 1e-10, "cycle {cycle}: expected 0.001, got {p}");
178 }
179 }
180
181 #[test]
182 fn test_error_rate_ramps_to_max() {
183 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
184 let p = model.error_probability(149, ReadEnd::Read1);
185 assert!((p - 0.01).abs() < 0.001, "expected ~0.01 at last cycle, got {p}");
186 }
187
188 #[test]
189 fn test_error_rate_increases_monotonically() {
190 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
191 let mut prev = 0.0;
192 for cycle in 0..150 {
193 let p = model.error_probability(cycle, ReadEnd::Read1);
194 assert!(p >= prev, "cycle {cycle}: {p} < {prev}");
195 prev = p;
196 }
197 }
198
199 #[test]
200 fn test_r2_higher_than_r1() {
201 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
202 for cycle in [0, 50, 100, 149] {
203 let p1 = model.error_probability(cycle, ReadEnd::Read1);
204 let p2 = model.error_probability(cycle, ReadEnd::Read2);
205 assert!(p2 > p1, "cycle {cycle}: R2 ({p2}) should be > R1 ({p1})");
206 }
207 }
208
209 #[test]
210 fn test_r2_multiplier() {
211 let model = IlluminaErrorModel::new(150, 0.001, 0.01);
212 let p1 = model.error_probability(0, ReadEnd::Read1);
213 let p2 = model.error_probability(0, ReadEnd::Read2);
214 assert!((p2 - p1 * 1.5).abs() < 1e-10, "R2 should be 1.5x R1 at start");
215 }
216
217 #[test]
218 fn test_apply_errors_reproducible() {
219 use rand::SeedableRng;
220 use rand::rngs::SmallRng;
221
222 let model = IlluminaErrorModel::new(10, 0.1, 0.3);
223 let mut bases1 = b"ACGTACGTAC".to_vec();
224 let mut bases2 = b"ACGTACGTAC".to_vec();
225
226 let mut rng1 = SmallRng::seed_from_u64(42);
227 let mut rng2 = SmallRng::seed_from_u64(42);
228
229 let (n1, q1) =
230 crate::error_model::apply_errors(&model, &mut bases1, ReadEnd::Read1, &mut rng1);
231 let (n2, q2) =
232 crate::error_model::apply_errors(&model, &mut bases2, ReadEnd::Read1, &mut rng2);
233
234 assert_eq!(bases1, bases2);
235 assert_eq!(q1, q2);
236 assert_eq!(n1, n2);
237 }
238
239 #[test]
240 fn test_apply_errors_introduces_errors() {
241 use rand::SeedableRng;
242 use rand::rngs::SmallRng;
243
244 let model = IlluminaErrorModel::new(100, 0.5, 0.5); let original = vec![b'A'; 100];
246 let mut bases = original.clone();
247 let mut rng = SmallRng::seed_from_u64(123);
248
249 let (n_errors, qualities) =
250 crate::error_model::apply_errors(&model, &mut bases, ReadEnd::Read1, &mut rng);
251
252 assert!(n_errors > 10, "expected many errors with 50% rate, got {n_errors}");
253 assert_eq!(qualities.len(), 100);
254 assert_ne!(bases, original);
256 }
257
258 #[test]
259 fn test_apply_errors_quality_encoding() {
260 use rand::SeedableRng;
261 use rand::rngs::SmallRng;
262
263 let model = IlluminaErrorModel::new(10, 0.001, 0.01);
264 let mut bases = b"ACGTACGTAC".to_vec();
265 let mut rng = SmallRng::seed_from_u64(99);
266
267 let (_, qualities) =
268 crate::error_model::apply_errors(&model, &mut bases, ReadEnd::Read1, &mut rng);
269
270 for &q in &qualities {
271 assert!(q >= 35, "quality {q} below minimum Phred+33 (35)");
273 assert!(q <= 74, "quality {q} above maximum Phred+33 (74)");
274 }
275 }
276
277 #[test]
278 fn test_apply_errors_zero_rate() {
279 use rand::SeedableRng;
280 use rand::rngs::SmallRng;
281
282 let model = IlluminaErrorModel::new(10, 0.0, 0.0);
283 let original = b"ACGTACGTAC".to_vec();
284 let mut bases = original.clone();
285 let mut rng = SmallRng::seed_from_u64(42);
286
287 let (n_errors, qualities) =
288 crate::error_model::apply_errors(&model, &mut bases, ReadEnd::Read1, &mut rng);
289
290 assert_eq!(n_errors, 0, "zero error rate should produce no errors");
291 assert_eq!(bases, original, "bases should be unchanged");
292 assert_eq!(qualities.len(), 10);
293 }
294
295 #[test]
296 #[should_panic(expected = "read_length must be > 0")]
297 fn test_zero_read_length_panics() {
298 let _ = IlluminaErrorModel::new(0, 0.001, 0.01);
299 }
300
301 #[test]
302 #[should_panic(expected = "min_error_rate")]
303 fn test_min_gt_max_panics() {
304 let _ = IlluminaErrorModel::new(150, 0.1, 0.01);
305 }
306}