fgumi_consensus/phred.rs
1//! Phred score utilities and probability calculations.
2//!
3//! This module provides functions for working with Phred quality scores and converting
4//! between Phred scores and probabilities. All probability calculations are done in
5//! log-space (natural log) for numerical stability.
6//!
7//! The implementations in this module are designed to exactly match fgbio's
8//! NumericTypes.scala for consistent consensus calling results.
9//!
10//! Key references:
11//! - Equation (7) and (10) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
12
13use std::f64::consts::LN_10;
14
15/// Natural log of 2, used as threshold in log1mexp (Equation 7)
16const LN_TWO: f64 = std::f64::consts::LN_2; // ln(2) ≈ 0.693
17
18/// Natural log of 4/3, used in probabilityOfErrorTwoTrials
19const LN_FOUR_THIRDS: f64 = 0.287_682_072_451_780_9; // ln(4/3)
20
21/// Natural log of 1.0 (which is 0)
22pub const LN_ONE: f64 = 0.0;
23
24// Re-export shared constants from fgumi-dna
25pub use fgumi_dna::{MIN_PHRED, NO_CALL_BASE, NO_CALL_BASE_LOWER};
26
27/// Maximum Phred score we handle (Q93, matching `SAMUtils.MAX_PHRED_SCORE`)
28pub const MAX_PHRED: u8 = 93;
29
30/// Precision constant used in Phred score conversion (matching fgbio)
31const PHRED_PRECISION: f64 = 0.001;
32
33/// Precomputed `phred_to_ln_error_prob(MAX_PHRED)` used as a threshold in `ln_prob_to_phred`.
34const MAX_PHRED_AS_LN_ERROR: f64 = -(MAX_PHRED as f64) * LN_10 / 10.0;
35
36/// Phred score type
37pub type PhredScore = u8;
38
39/// Log probability type (natural log)
40pub type LogProbability = f64;
41
42/// Converts a Phred score to a log probability of error
43///
44/// Phred score Q relates to error probability P by: Q = -10 * log10(P)
45/// So: P = 10^(-Q/10)
46/// And: ln(P) = ln(10^(-Q/10)) = -Q * ln(10) / 10
47///
48/// # Examples
49/// ```
50/// use fgumi_lib::phred::phred_to_ln_error_prob;
51///
52/// // Q10 corresponds to 10% error rate
53/// let ln_error = phred_to_ln_error_prob(10);
54/// assert!((ln_error - 0.1_f64.ln()).abs() < 1e-10);
55///
56/// // Q20 corresponds to 1% error rate
57/// let ln_error = phred_to_ln_error_prob(20);
58/// assert!((ln_error - 0.01_f64.ln()).abs() < 1e-10);
59///
60/// // Q30 corresponds to 0.1% error rate
61/// let ln_error = phred_to_ln_error_prob(30);
62/// assert!((ln_error - 0.001_f64.ln()).abs() < 1e-10);
63/// ```
64#[inline]
65#[must_use]
66pub fn phred_to_ln_error_prob(phred: PhredScore) -> LogProbability {
67 -f64::from(phred) * LN_10 / 10.0
68}
69
70/// Converts a Phred score to a log probability of being correct (1 - error)
71///
72/// For numerical stability with high quality scores, we use:
73/// ln(1 - P) ≈ ln(1 - e^ln(P))
74///
75/// # Examples
76/// ```
77/// use fgumi_lib::phred::phred_to_ln_correct_prob;
78///
79/// // Q30 = 0.1% error, so 99.9% correct
80/// let ln_correct = phred_to_ln_correct_prob(30);
81/// assert!((ln_correct - 0.999_f64.ln()).abs() < 1e-6);
82///
83/// // Q20 = 1% error, so 99% correct
84/// let ln_correct = phred_to_ln_correct_prob(20);
85/// assert!((ln_correct - 0.99_f64.ln()).abs() < 1e-6);
86/// ```
87#[inline]
88#[must_use]
89pub fn phred_to_ln_correct_prob(phred: PhredScore) -> LogProbability {
90 let ln_error = phred_to_ln_error_prob(phred);
91 ln_one_minus_exp(ln_error)
92}
93
94/// Converts a log probability to a Phred score
95///
96/// Q = -10 * log10(P) = -10 * ln(P) / ln(10)
97///
98/// Uses floor(value + precision) matching fgbio's PhredScore.fromLogProbability.
99/// The result is clamped to the valid Phred score range [2, 93].
100///
101/// # Examples
102/// ```
103/// use fgumi_lib::phred::ln_prob_to_phred;
104///
105/// // 1% error (0.01) corresponds to Q20
106/// let phred = ln_prob_to_phred(0.01_f64.ln());
107/// assert_eq!(phred, 20);
108///
109/// // 0.1% error (0.001) corresponds to Q30
110/// let phred = ln_prob_to_phred(0.001_f64.ln());
111/// assert_eq!(phred, 30);
112///
113/// // Very low error probability is clamped to Q93
114/// let phred = ln_prob_to_phred(1e-20_f64.ln());
115/// assert_eq!(phred, 93);
116/// ```
117#[inline]
118#[must_use]
119pub fn ln_prob_to_phred(ln_prob: LogProbability) -> PhredScore {
120 // Match fgbio's PhredScore.fromLogProbability:
121 // if (lnProbError < MaxValueAsLogDouble) MaxValue
122 // else Math.floor(-10.0 * (lnProbError/ Ln10) + Precision).toByte
123 if ln_prob < MAX_PHRED_AS_LN_ERROR {
124 return MAX_PHRED;
125 }
126 let phred = (-10.0 * ln_prob / LN_10 + PHRED_PRECISION).floor();
127 #[expect(
128 clippy::cast_possible_truncation,
129 clippy::cast_sign_loss,
130 reason = "value is clamped to [MIN_PHRED, MAX_PHRED] which fits in u8"
131 )]
132 {
133 phred.clamp(f64::from(MIN_PHRED), f64::from(MAX_PHRED)) as PhredScore
134 }
135}
136
137/// Precise computation of log(1 + exp(x)).
138///
139/// Implements Equation (10) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
140/// Matches fgbio's `log1pexp` function exactly.
141///
142/// Thresholds from the paper and fgbio:
143/// - x <= -37: exp(x) is so small that log(1 + exp(x)) ≈ exp(x)
144/// - x <= 18: use log1p(exp(x)) for precision
145/// - x <= 33.3: use x + exp(-x) approximation
146/// - x > 33.3: exp(-x) is negligible, so log(1 + exp(x)) ≈ x
147#[inline]
148fn log1pexp(x: f64) -> f64 {
149 if x <= -37.0 {
150 x.exp()
151 } else if x <= 18.0 {
152 x.exp().ln_1p()
153 } else if x <= 33.3 {
154 x + (-x).exp()
155 } else {
156 x
157 }
158}
159
160/// Computes ln(1 - e^x) for x < 0 in a numerically stable way.
161///
162/// This is equivalent to fgbio's `log1mexp(a)` where `a = -x` (positive).
163/// Implements Equation (7) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
164///
165/// For x >= -ln(2): use ln(-expm1(x)) to avoid catastrophic cancellation when e^x is close to 1
166/// For x < -ln(2): use ln1p(-exp(x)) which is stable when e^x is small
167#[inline]
168fn ln_one_minus_exp(x: f64) -> f64 {
169 if x >= 0.0 {
170 f64::NEG_INFINITY
171 } else if x >= -LN_TWO {
172 // When x is close to 0 (x >= -ln(2)), e^x is close to 1, so 1 - e^x is close to 0
173 // Use expm1 to avoid catastrophic cancellation
174 // ln(1 - e^x) = ln(-(e^x - 1)) = ln(-expm1(x))
175 (-x.exp_m1()).ln()
176 } else {
177 // When x < -ln(2), e^x < 0.5, so 1 - e^x > 0.5, safe to use ln1p
178 // ln(1 - e^x) = ln(1 + (-e^x)) = ln1p(-e^x)
179 (-x.exp()).ln_1p()
180 }
181}
182
183/// Computes log(exp(a) - exp(b)) where a > b.
184///
185/// Matches fgbio's `aOrNotB` function.
186/// Uses: a + log1mexp(a - b) for numerical stability.
187#[inline]
188fn ln_a_minus_b(a: f64, b: f64) -> f64 {
189 if b.is_infinite() && b < 0.0 {
190 a
191 } else if (a - b).abs() < f64::EPSILON {
192 f64::NEG_INFINITY
193 } else {
194 // a + log(1 - exp(-(a-b))) = a + log1mexp(a-b)
195 // In our convention, ln_one_minus_exp takes negative values, but here b < a
196 a + ln_one_minus_exp(b - a)
197 }
198}
199
200/// Computes the probability of observing an error across two independent trials.
201///
202/// Matches fgbio's `probabilityOfErrorTwoTrials` exactly.
203///
204/// Given two error probabilities p1 and p2 (in log space), this models the probability
205/// of either or both reads being in error. For DNA sequencing with 4 bases:
206///
207/// f(X, Y) = X + Y - 4/3 * X * Y
208///
209/// where the 4/3 factor accounts for the probability of two errors resulting in
210/// the same incorrect base (1/3 chance).
211///
212/// This is equivalent to:
213/// P(error) = p1*(1-p2) + (1-p1)*p2 + p1*p2*(2/3)
214///
215/// The quick approximation is used when one probability is much smaller than the other
216/// (difference of 6 or more in log space corresponds to ~400x difference).
217///
218/// # Examples
219/// ```
220/// use fgumi_lib::phred::ln_error_prob_two_trials;
221///
222/// // If both trials have 10% error, combined error using 4/3 formula:
223/// // f(0.1, 0.1) = 0.1 + 0.1 - 4/3*0.1*0.1 = 0.2 - 0.0133... ≈ 0.1867
224/// let ln_p1 = 0.1_f64.ln();
225/// let ln_p2 = 0.1_f64.ln();
226/// let result = ln_error_prob_two_trials(ln_p1, ln_p2);
227/// let expected = 0.1 + 0.1 - (4.0/3.0) * 0.1 * 0.1;
228/// assert!((result.exp() - expected).abs() < 1e-10);
229/// ```
230#[must_use]
231pub fn ln_error_prob_two_trials(ln_p1: LogProbability, ln_p2: LogProbability) -> LogProbability {
232 // Match fgbio's probabilityOfErrorTwoTrials exactly
233 // Ensure ln_p1 >= ln_p2 (larger error probability first)
234 let (ln_p1, ln_p2) = if ln_p1 < ln_p2 { (ln_p2, ln_p1) } else { (ln_p1, ln_p2) };
235
236 // Quick approximation when one error probability is much smaller
237 // A difference of 6 in log space is ~400x in linear space
238 if ln_p1 - ln_p2 >= 6.0 {
239 return ln_p1;
240 }
241
242 // Full formula: f(X, Y) = X + Y - 4/3 * X * Y
243 // In log space:
244 // term1 = log(X + Y) = ln_sum_exp(ln_p1, ln_p2)
245 // term2 = log(4/3 * X * Y) = LN_FOUR_THIRDS + ln_p1 + ln_p2
246 // result = log(term1 - term2) = ln_a_minus_b(term1, term2)
247 let term1 = ln_sum_exp(ln_p1, ln_p2); // log(X + Y)
248 let term2 = LN_FOUR_THIRDS + ln_p1 + ln_p2; // log(4/3 * X * Y)
249 ln_a_minus_b(term1, term2)
250}
251
252/// Computes log(a + b) given log(a) and log(b)
253///
254/// Matches fgbio's `or` function using log1pexp for numerical stability:
255/// log(a + b) = log(a) + log(1 + exp(log(b) - log(a)))
256/// = log(a) + log1pexp(log(b) - log(a))
257///
258/// This is essential for adding probabilities while working in log-space, avoiding
259/// numerical underflow that would occur when converting back to linear space.
260///
261/// # Examples
262/// ```
263/// use fgumi_lib::phred::ln_sum_exp;
264///
265/// // ln(0.1) + ln(0.2) should give ln(0.3)
266/// let result = ln_sum_exp(0.1_f64.ln(), 0.2_f64.ln());
267/// assert!((result - 0.3_f64.ln()).abs() < 1e-10);
268///
269/// // Works with very small probabilities
270/// let result = ln_sum_exp(1e-100_f64.ln(), 2e-100_f64.ln());
271/// assert!((result - 3e-100_f64.ln()).abs() < 1e-10);
272/// ```
273#[must_use]
274pub fn ln_sum_exp(ln_a: LogProbability, ln_b: LogProbability) -> LogProbability {
275 if ln_a.is_infinite() && ln_a < 0.0 {
276 return ln_b;
277 }
278 if ln_b.is_infinite() && ln_b < 0.0 {
279 return ln_a;
280 }
281
282 // Match fgbio's `or` function: ensure ln_a <= ln_b, then use log1pexp
283 let (ln_a, ln_b) = if ln_b < ln_a { (ln_b, ln_a) } else { (ln_a, ln_b) };
284 ln_a + log1pexp(ln_b - ln_a)
285}
286
287/// Computes log(sum(exp(values))) for an array of log probabilities
288///
289/// This is the normalization constant for converting log-likelihoods to
290/// log-probabilities (posteriors). Uses the log-sum-exp trick for numerical
291/// stability across all values.
292///
293/// # Examples
294/// ```
295/// use fgumi_lib::phred::ln_sum_exp_array;
296///
297/// // ln(0.1) + ln(0.2) + ln(0.3) should give ln(0.6)
298/// let values = vec![0.1_f64.ln(), 0.2_f64.ln(), 0.3_f64.ln()];
299/// let result = ln_sum_exp_array(&values);
300/// assert!((result - 0.6_f64.ln()).abs() < 1e-10);
301///
302/// // Empty array returns negative infinity
303/// let result = ln_sum_exp_array(&[]);
304/// assert!(result.is_infinite() && result < 0.0);
305/// ```
306#[must_use]
307pub fn ln_sum_exp_array(values: &[LogProbability]) -> LogProbability {
308 if values.is_empty() {
309 return f64::NEG_INFINITY;
310 }
311
312 let mut min_value = f64::INFINITY;
313 let mut min_index = 0;
314 for (i, value) in values.iter().enumerate() {
315 if *value < min_value {
316 min_index = i;
317 min_value = *value;
318 }
319 }
320 if min_value.is_infinite() {
321 return min_value;
322 }
323 let mut sum = min_value;
324 for (i, value) in values.iter().enumerate() {
325 if i != min_index {
326 sum = ln_sum_exp(sum, *value);
327 }
328 }
329 sum
330}
331
332/// Normalizes a log probability by a normalization constant
333///
334/// Returns `ln(exp(ln_value)` / `exp(ln_norm)`) = `ln_value` - `ln_norm`
335#[inline]
336#[must_use]
337pub fn ln_normalize(ln_value: LogProbability, ln_norm: LogProbability) -> LogProbability {
338 ln_value - ln_norm
339}
340
341/// Computes ln(1 - exp(x)) = ln(NOT(exp(x)))
342#[inline]
343#[must_use]
344pub fn ln_not(x: LogProbability) -> LogProbability {
345 ln_one_minus_exp(x)
346}
347
348#[cfg(test)]
349#[expect(
350 clippy::similar_names,
351 reason = "test variables use short math-related names like ln_p, ln_q"
352)]
353mod tests {
354 use super::*;
355
356 #[test]
357 fn test_phred_to_ln_error() {
358 // Q10 = 10% error = 0.1
359 let q10_error = phred_to_ln_error_prob(10);
360 assert!((q10_error - 0.1_f64.ln()).abs() < 1e-10);
361
362 // Q20 = 1% error = 0.01
363 let q20_error = phred_to_ln_error_prob(20);
364 assert!((q20_error - 0.01_f64.ln()).abs() < 1e-10);
365
366 // Q30 = 0.1% error = 0.001
367 let q30_error = phred_to_ln_error_prob(30);
368 assert!((q30_error - 0.001_f64.ln()).abs() < 1e-10);
369 }
370
371 #[test]
372 fn test_phred_round_trip() {
373 // Note: Q0 and Q1 will be clamped to MIN_PHRED (2) on the round trip
374 for q in [MIN_PHRED, 10, 20, 30, 40, 50, 60] {
375 let ln_p = phred_to_ln_error_prob(q);
376 let q_back = ln_prob_to_phred(ln_p);
377 assert_eq!(q, q_back);
378 }
379 }
380
381 #[test]
382 fn test_ln_sum_exp() {
383 // ln(0.1) + ln(0.2) should give ln(0.3)
384 let result = ln_sum_exp(0.1_f64.ln(), 0.2_f64.ln());
385 assert!((result - 0.3_f64.ln()).abs() < 1e-10);
386 }
387
388 #[test]
389 fn test_ln_sum_exp_array() {
390 // ln(0.1) + ln(0.2) + ln(0.3) should give ln(0.6)
391 let values = vec![0.1_f64.ln(), 0.2_f64.ln(), 0.3_f64.ln()];
392 let result = ln_sum_exp_array(&values);
393 assert!((result - 0.6_f64.ln()).abs() < 1e-10);
394 }
395
396 #[test]
397 fn test_error_two_trials() {
398 // If both trials have 10% error, combined error using 4/3 formula:
399 // f(0.1, 0.1) = 0.1 + 0.1 - 4/3*0.1*0.1 = 0.2 - 0.01333... ≈ 0.18667
400 let ln_p1 = 0.1_f64.ln();
401 let ln_p2 = 0.1_f64.ln();
402 let result = ln_error_prob_two_trials(ln_p1, ln_p2);
403 let expected = 0.1 + 0.1 - (4.0 / 3.0) * 0.1 * 0.1;
404 assert!((result.exp() - expected).abs() < 1e-10);
405 }
406
407 /// Ported from fgbio's NumericTypesTest.scala
408 /// Tests the comprehensive probabilityOfErrorTwoTrials formula
409 #[test]
410 fn test_error_two_trials_comprehensive_fgbio() {
411 // From fgbio: for (i <- 1 to 100; j <- 1 to 100)
412 // expected = (p1*(1-p2)) + ((1-p1)*p2) + (p1 * p2 * 2/3)
413 // This simplifies to: p1 + p2 - 4/3*p1*p2
414 for i in 1..=100 {
415 for j in 1..=100 {
416 let p1 = 1.0 / f64::from(i);
417 let p2 = 1.0 / f64::from(j);
418 let expected = (p1 * (1.0 - p2)) + ((1.0 - p1) * p2) + (p1 * p2 * (2.0 / 3.0));
419 let ln_p1 = p1.ln();
420 let ln_p2 = p2.ln();
421 let actual = ln_error_prob_two_trials(ln_p1, ln_p2);
422 assert!(
423 (actual.exp() - expected).abs() < 0.0001,
424 "Failed for i={}, j={}: actual={}, expected={}",
425 i,
426 j,
427 actual.exp(),
428 expected
429 );
430 }
431 }
432 }
433
434 /// Ported from fgbio's `NumericTypesTest`: "or in log space"
435 #[test]
436 fn test_ln_sum_exp_fgbio() {
437 // exp(or(10, 10)) shouldBe exp(10)*2
438 let result = ln_sum_exp(10.0, 10.0);
439 assert!((result.exp() - 10.0_f64.exp() * 2.0).abs() < 0.00001);
440
441 // exp(or(10, 20)) shouldBe exp(10)+exp(20)
442 let result = ln_sum_exp(10.0, 20.0);
443 assert!((result.exp() - (10.0_f64.exp() + 20.0_f64.exp())).abs() < 0.00001);
444
445 // exp(or(20, 10)) shouldBe exp(20)+exp(10)
446 let result = ln_sum_exp(20.0, 10.0);
447 assert!((result.exp() - (20.0_f64.exp() + 10.0_f64.exp())).abs() < 0.00001);
448
449 // or(10, LnZero) shouldBe exp(10)
450 let result = ln_sum_exp(10.0, f64::NEG_INFINITY);
451 assert!((result.exp() - 10.0_f64.exp()).abs() < 0.00001);
452
453 // or(LnZero, 10) shouldBe exp(10)
454 let result = ln_sum_exp(f64::NEG_INFINITY, 10.0);
455 assert!((result.exp() - 10.0_f64.exp()).abs() < 0.00001);
456
457 // or(-718.39..., -8.40...) shouldBe -8.40... (very small + small = small)
458 let result = ln_sum_exp(-718.394_775_628_242_3, -8.404_216_861_178_751);
459 assert!((result - (-8.404_216_861_178_751)).abs() < 0.00001);
460 }
461
462 /// Ported from fgbio's `NumericTypesTest`: "aOrNotB in log space"
463 #[test]
464 fn test_ln_a_minus_b_fgbio() {
465 let q10 = phred_to_ln_error_prob(10); // ln(0.1)
466 let q20 = phred_to_ln_error_prob(20); // ln(0.01)
467
468 // aOrNotB(10, 10) shouldBe LnZero
469 assert!(ln_a_minus_b(10.0, 10.0).is_infinite() && ln_a_minus_b(10.0, 10.0) < 0.0);
470
471 // aOrNotB(q10, q10) shouldBe LnZero
472 assert!(ln_a_minus_b(q10, q10).is_infinite() && ln_a_minus_b(q10, q10) < 0.0);
473
474 // exp(aOrNotB(q10, q20)) shouldBe (0.1-0.01)
475 let result = ln_a_minus_b(q10, q20);
476 assert!((result.exp() - 0.09).abs() < 0.00001, "Expected 0.09, got {}", result.exp());
477
478 // aOrNotB(LnTen, LnZero) shouldBe LnTen
479 let ln_ten = 10.0_f64.ln();
480 let result = ln_a_minus_b(ln_ten, f64::NEG_INFINITY);
481 assert!((result - ln_ten).abs() < 0.00001);
482 }
483
484 /// Ported from fgbio's `NumericTypesTest`: "1 - probability in log space"
485 #[test]
486 fn test_ln_one_minus_exp_fgbio() {
487 let q10 = phred_to_ln_error_prob(10); // ln(0.1)
488 let q20 = phred_to_ln_error_prob(20); // ln(0.01)
489
490 // exp(not(q10)) shouldBe 0.9
491 let result = ln_one_minus_exp(q10);
492 assert!((result.exp() - 0.9).abs() < 0.00001);
493
494 // exp(not(q20)) shouldBe 0.99
495 let result = ln_one_minus_exp(q20);
496 assert!((result.exp() - 0.99).abs() < 0.00001);
497
498 // exp(not(ln(0.90))) shouldBe 0.1
499 let result = ln_one_minus_exp(0.90_f64.ln());
500 assert!((result.exp() - 0.1).abs() < 0.00001);
501
502 // exp(not(ln(0.99))) shouldBe 0.01
503 let result = ln_one_minus_exp(0.99_f64.ln());
504 assert!((result.exp() - 0.01).abs() < 0.00001);
505
506 // exp(not(LnZero)) shouldBe 1.0
507 let result = ln_one_minus_exp(f64::NEG_INFINITY);
508 assert!((result.exp() - 1.0).abs() < 0.00001);
509 }
510
511 /// Ported from fgbio's `NumericTypesTest`: phred score conversions
512 #[test]
513 fn test_phred_conversions_fgbio() {
514 // fromLogProbability(toLogProbability(0.0)) shouldBe MaxValue
515 let ln_zero = 0.0_f64.ln(); // NEG_INFINITY
516 assert_eq!(ln_prob_to_phred(ln_zero), MAX_PHRED);
517
518 // fromLogProbability(toLogProbability(0.1)) shouldBe 10
519 let ln_01 = 0.1_f64.ln();
520 assert_eq!(ln_prob_to_phred(ln_01), 10);
521
522 // fromLogProbability(toLogProbability(0.5)) shouldBe 3
523 let ln_05 = 0.5_f64.ln();
524 assert_eq!(ln_prob_to_phred(ln_05), 3);
525
526 // fromLogProbability(toLogProbability(1.0)) shouldBe 0
527 let ln_1 = 1.0_f64.ln(); // 0.0
528 // Note: fgbio returns 0, but our MIN_PHRED is 2, so we clamp to 2
529 assert_eq!(ln_prob_to_phred(ln_1), MIN_PHRED);
530 }
531
532 /// Test log1pexp implementation (Equation 10)
533 #[test]
534 fn test_log1pexp() {
535 // Test threshold regions
536 // x <= -37: returns exp(x)
537 assert!((log1pexp(-50.0) - (-50.0_f64).exp()).abs() < 1e-10);
538 assert!((log1pexp(-37.0) - (-37.0_f64).exp().ln_1p()).abs() < 1e-10);
539
540 // x <= 18: returns log1p(exp(x))
541 assert!((log1pexp(0.0) - (1.0_f64 + 1.0).ln()).abs() < 1e-10);
542 assert!((log1pexp(10.0) - (1.0 + 10.0_f64.exp()).ln()).abs() < 1e-10);
543
544 // x <= 33.3: returns x + exp(-x)
545 assert!((log1pexp(25.0) - (25.0 + (-25.0_f64).exp())).abs() < 1e-10);
546
547 // x > 33.3: returns x
548 assert!((log1pexp(40.0) - 40.0).abs() < 1e-10);
549 assert!((log1pexp(100.0) - 100.0).abs() < 1e-10);
550 }
551
552 #[test]
553 fn test_ln_normalize() {
554 // Normalizing ln(0.2) by ln(0.5) should give ln(0.4)
555 let result = ln_normalize(0.2_f64.ln(), 0.5_f64.ln());
556 assert!((result - 0.4_f64.ln()).abs() < 1e-10);
557 }
558
559 // =====================================================================
560 // Additional edge case tests
561 // =====================================================================
562
563 #[test]
564 fn test_phred_boundary_values() {
565 // Test boundary Phred scores
566 // Q0 = 100% error (probability = 1.0)
567 let q0_error = phred_to_ln_error_prob(0);
568 assert!((q0_error - 0.0).abs() < 1e-10); // ln(1.0) = 0
569
570 // Q2 = MIN_PHRED
571 let q2_error = phred_to_ln_error_prob(MIN_PHRED);
572 let expected_q2 = 10.0_f64.powf(-0.2); // 10^(-2/10) ≈ 0.631
573 assert!((q2_error.exp() - expected_q2).abs() < 1e-6);
574
575 // Q93 = MAX_PHRED
576 let q93_error = phred_to_ln_error_prob(MAX_PHRED);
577 let expected_q93 = 10.0_f64.powf(-9.3); // 10^(-93/10) ≈ 5e-10
578 assert!((q93_error.exp() - expected_q93).abs() < 1e-15);
579 }
580
581 #[test]
582 fn test_phred_to_ln_correct_boundary() {
583 // Q0: error = 1.0, correct = 0.0 → ln(0) = -∞
584 let q0_correct = phred_to_ln_correct_prob(0);
585 assert!(q0_correct.is_infinite() && q0_correct < 0.0);
586
587 // Q93: error ≈ 5e-10, correct ≈ 1.0 → ln(correct) ≈ 0
588 let q93_correct = phred_to_ln_correct_prob(MAX_PHRED);
589 assert!(q93_correct < 0.0 && q93_correct.abs() < 1e-9);
590 }
591
592 #[test]
593 fn test_ln_prob_to_phred_clamping() {
594 // Very high error (low quality) should clamp to MIN_PHRED
595 let very_high_error = 0.9_f64.ln(); // 90% error
596 assert_eq!(ln_prob_to_phred(very_high_error), MIN_PHRED);
597
598 // 100% error should also clamp to MIN_PHRED
599 // ln(1.0) = 0, which gives Q = 0, clamped to MIN_PHRED
600 assert_eq!(ln_prob_to_phred(0.0), MIN_PHRED);
601
602 // Very low error should clamp to MAX_PHRED
603 let very_low_error = 1e-15_f64.ln();
604 assert_eq!(ln_prob_to_phred(very_low_error), MAX_PHRED);
605 }
606
607 #[test]
608 fn test_ln_sum_exp_array_edge_cases() {
609 // Empty array
610 let result = ln_sum_exp_array(&[]);
611 assert!(result.is_infinite() && result < 0.0);
612
613 // Single element
614 let result = ln_sum_exp_array(&[0.5_f64.ln()]);
615 assert!((result - 0.5_f64.ln()).abs() < 1e-10);
616
617 // Two elements (should match ln_sum_exp)
618 let a = 0.2_f64.ln();
619 let b = 0.3_f64.ln();
620 let array_result = ln_sum_exp_array(&[a, b]);
621 let pair_result = ln_sum_exp(a, b);
622 assert!((array_result - pair_result).abs() < 1e-10);
623
624 // All negative infinity
625 let result = ln_sum_exp_array(&[f64::NEG_INFINITY, f64::NEG_INFINITY]);
626 assert!(result.is_infinite() && result < 0.0);
627 }
628
629 #[test]
630 fn test_ln_sum_exp_very_small_values() {
631 // Very small probabilities (should not underflow)
632 let a = 1e-300_f64.ln();
633 let b = 2e-300_f64.ln();
634 let result = ln_sum_exp(a, b);
635 let expected = 3e-300_f64.ln();
636 assert!((result - expected).abs() < 1.0); // Large tolerance for extreme values
637 }
638
639 #[test]
640 fn test_ln_not_edge_cases() {
641 // ln_not(ln(0)) = ln(1 - 0) = ln(1) = 0
642 let result = ln_not(f64::NEG_INFINITY);
643 assert!((result - 0.0).abs() < 1e-10);
644
645 // ln_not(ln(1)) = ln(1 - 1) = ln(0) = -∞
646 let result = ln_not(0.0);
647 assert!(result.is_infinite() && result < 0.0);
648
649 // ln_not(ln(0.5)) = ln(0.5)
650 let result = ln_not(0.5_f64.ln());
651 assert!((result - 0.5_f64.ln()).abs() < 1e-10);
652
653 // Positive value (invalid probability) should return -∞
654 let result = ln_not(1.0);
655 assert!(result.is_infinite() && result < 0.0);
656 }
657
658 #[test]
659 fn test_error_two_trials_quick_approximation() {
660 // When one probability is much smaller (>6 in log space), use quick approximation
661 let large_error = 0.5_f64.ln(); // ln(0.5) ≈ -0.693
662 let small_error = 1e-6_f64.ln(); // ln(1e-6) ≈ -13.8
663
664 let result = ln_error_prob_two_trials(large_error, small_error);
665 // Should approximately equal the larger error
666 assert!((result - large_error).abs() < 0.01);
667
668 // Also test with arguments swapped
669 let result2 = ln_error_prob_two_trials(small_error, large_error);
670 assert!((result2 - large_error).abs() < 0.01);
671 }
672
673 #[test]
674 fn test_log1pexp_boundary_values() {
675 // At threshold -37
676 let at_threshold = log1pexp(-37.0);
677 assert!(at_threshold > 0.0);
678 assert!(at_threshold < 1e-15);
679
680 // At threshold 18
681 let at_18 = log1pexp(18.0);
682 let expected = (1.0 + 18.0_f64.exp()).ln();
683 assert!((at_18 - expected).abs() < 1e-10);
684
685 // At threshold 33.3
686 let at_33 = log1pexp(33.3);
687 let expected = 33.3 + (-33.3_f64).exp();
688 assert!((at_33 - expected).abs() < 1e-10);
689 }
690
691 #[test]
692 fn test_constants() {
693 // Verify constants are correct
694 assert!((LN_TWO - std::f64::consts::LN_2).abs() < 1e-15);
695 assert!((LN_FOUR_THIRDS - (4.0_f64 / 3.0).ln()).abs() < 1e-15);
696 assert!(LN_ONE.abs() < 1e-15); // LN_ONE should be 0.0
697 assert_eq!(MIN_PHRED, 2);
698 assert_eq!(MAX_PHRED, 93);
699 assert_eq!(NO_CALL_BASE, b'N');
700 assert_eq!(NO_CALL_BASE_LOWER, b'n');
701 }
702}