fgumi_consensus/
phred.rs

1//! Phred score utilities and probability calculations.
2//!
3//! This module provides functions for working with Phred quality scores and converting
4//! between Phred scores and probabilities. All probability calculations are done in
5//! log-space (natural log) for numerical stability.
6//!
7//! The implementations in this module are designed to exactly match fgbio's
8//! NumericTypes.scala for consistent consensus calling results.
9//!
10//! Key references:
11//! - Equation (7) and (10) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
12
13use std::f64::consts::LN_10;
14
15/// Natural log of 2, used as threshold in log1mexp (Equation 7)
16const LN_TWO: f64 = std::f64::consts::LN_2; // ln(2) ≈ 0.693
17
18/// Natural log of 4/3, used in probabilityOfErrorTwoTrials
19const LN_FOUR_THIRDS: f64 = 0.287_682_072_451_780_9; // ln(4/3)
20
21/// Natural log of 1.0 (which is 0)
22pub const LN_ONE: f64 = 0.0;
23
24// Re-export shared constants from fgumi-dna
25pub use fgumi_dna::{MIN_PHRED, NO_CALL_BASE, NO_CALL_BASE_LOWER};
26
27/// Maximum Phred score we handle (Q93, matching `SAMUtils.MAX_PHRED_SCORE`)
28pub const MAX_PHRED: u8 = 93;
29
30/// Precision constant used in Phred score conversion (matching fgbio)
31const PHRED_PRECISION: f64 = 0.001;
32
33/// Precomputed `phred_to_ln_error_prob(MAX_PHRED)` used as a threshold in `ln_prob_to_phred`.
34const MAX_PHRED_AS_LN_ERROR: f64 = -(MAX_PHRED as f64) * LN_10 / 10.0;
35
36/// Phred score type
37pub type PhredScore = u8;
38
39/// Log probability type (natural log)
40pub type LogProbability = f64;
41
42/// Converts a Phred score to a log probability of error
43///
44/// Phred score Q relates to error probability P by: Q = -10 * log10(P)
45/// So: P = 10^(-Q/10)
46/// And: ln(P) = ln(10^(-Q/10)) = -Q * ln(10) / 10
47///
48/// # Examples
49/// ```
50/// use fgumi_lib::phred::phred_to_ln_error_prob;
51///
52/// // Q10 corresponds to 10% error rate
53/// let ln_error = phred_to_ln_error_prob(10);
54/// assert!((ln_error - 0.1_f64.ln()).abs() < 1e-10);
55///
56/// // Q20 corresponds to 1% error rate
57/// let ln_error = phred_to_ln_error_prob(20);
58/// assert!((ln_error - 0.01_f64.ln()).abs() < 1e-10);
59///
60/// // Q30 corresponds to 0.1% error rate
61/// let ln_error = phred_to_ln_error_prob(30);
62/// assert!((ln_error - 0.001_f64.ln()).abs() < 1e-10);
63/// ```
64#[inline]
65#[must_use]
66pub fn phred_to_ln_error_prob(phred: PhredScore) -> LogProbability {
67    -f64::from(phred) * LN_10 / 10.0
68}
69
70/// Converts a Phred score to a log probability of being correct (1 - error)
71///
72/// For numerical stability with high quality scores, we use:
73/// ln(1 - P) ≈ ln(1 - e^ln(P))
74///
75/// # Examples
76/// ```
77/// use fgumi_lib::phred::phred_to_ln_correct_prob;
78///
79/// // Q30 = 0.1% error, so 99.9% correct
80/// let ln_correct = phred_to_ln_correct_prob(30);
81/// assert!((ln_correct - 0.999_f64.ln()).abs() < 1e-6);
82///
83/// // Q20 = 1% error, so 99% correct
84/// let ln_correct = phred_to_ln_correct_prob(20);
85/// assert!((ln_correct - 0.99_f64.ln()).abs() < 1e-6);
86/// ```
87#[inline]
88#[must_use]
89pub fn phred_to_ln_correct_prob(phred: PhredScore) -> LogProbability {
90    let ln_error = phred_to_ln_error_prob(phred);
91    ln_one_minus_exp(ln_error)
92}
93
94/// Converts a log probability to a Phred score
95///
96/// Q = -10 * log10(P) = -10 * ln(P) / ln(10)
97///
98/// Uses floor(value + precision) matching fgbio's PhredScore.fromLogProbability.
99/// The result is clamped to the valid Phred score range [2, 93].
100///
101/// # Examples
102/// ```
103/// use fgumi_lib::phred::ln_prob_to_phred;
104///
105/// // 1% error (0.01) corresponds to Q20
106/// let phred = ln_prob_to_phred(0.01_f64.ln());
107/// assert_eq!(phred, 20);
108///
109/// // 0.1% error (0.001) corresponds to Q30
110/// let phred = ln_prob_to_phred(0.001_f64.ln());
111/// assert_eq!(phred, 30);
112///
113/// // Very low error probability is clamped to Q93
114/// let phred = ln_prob_to_phred(1e-20_f64.ln());
115/// assert_eq!(phred, 93);
116/// ```
117#[inline]
118#[must_use]
119pub fn ln_prob_to_phred(ln_prob: LogProbability) -> PhredScore {
120    // Match fgbio's PhredScore.fromLogProbability:
121    // if (lnProbError < MaxValueAsLogDouble) MaxValue
122    // else Math.floor(-10.0 * (lnProbError/ Ln10) + Precision).toByte
123    if ln_prob < MAX_PHRED_AS_LN_ERROR {
124        return MAX_PHRED;
125    }
126    let phred = (-10.0 * ln_prob / LN_10 + PHRED_PRECISION).floor();
127    #[expect(
128        clippy::cast_possible_truncation,
129        clippy::cast_sign_loss,
130        reason = "value is clamped to [MIN_PHRED, MAX_PHRED] which fits in u8"
131    )]
132    {
133        phred.clamp(f64::from(MIN_PHRED), f64::from(MAX_PHRED)) as PhredScore
134    }
135}
136
137/// Precise computation of log(1 + exp(x)).
138///
139/// Implements Equation (10) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
140/// Matches fgbio's `log1pexp` function exactly.
141///
142/// Thresholds from the paper and fgbio:
143/// - x <= -37:   exp(x) is so small that log(1 + exp(x)) ≈ exp(x)
144/// - x <= 18:    use log1p(exp(x)) for precision
145/// - x <= 33.3:  use x + exp(-x) approximation
146/// - x > 33.3:   exp(-x) is negligible, so log(1 + exp(x)) ≈ x
147#[inline]
148fn log1pexp(x: f64) -> f64 {
149    if x <= -37.0 {
150        x.exp()
151    } else if x <= 18.0 {
152        x.exp().ln_1p()
153    } else if x <= 33.3 {
154        x + (-x).exp()
155    } else {
156        x
157    }
158}
159
160/// Computes ln(1 - e^x) for x < 0 in a numerically stable way.
161///
162/// This is equivalent to fgbio's `log1mexp(a)` where `a = -x` (positive).
163/// Implements Equation (7) from <https://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf>
164///
165/// For x >= -ln(2): use ln(-expm1(x)) to avoid catastrophic cancellation when e^x is close to 1
166/// For x < -ln(2):  use ln1p(-exp(x)) which is stable when e^x is small
167#[inline]
168fn ln_one_minus_exp(x: f64) -> f64 {
169    if x >= 0.0 {
170        f64::NEG_INFINITY
171    } else if x >= -LN_TWO {
172        // When x is close to 0 (x >= -ln(2)), e^x is close to 1, so 1 - e^x is close to 0
173        // Use expm1 to avoid catastrophic cancellation
174        // ln(1 - e^x) = ln(-(e^x - 1)) = ln(-expm1(x))
175        (-x.exp_m1()).ln()
176    } else {
177        // When x < -ln(2), e^x < 0.5, so 1 - e^x > 0.5, safe to use ln1p
178        // ln(1 - e^x) = ln(1 + (-e^x)) = ln1p(-e^x)
179        (-x.exp()).ln_1p()
180    }
181}
182
183/// Computes log(exp(a) - exp(b)) where a > b.
184///
185/// Matches fgbio's `aOrNotB` function.
186/// Uses: a + log1mexp(a - b) for numerical stability.
187#[inline]
188fn ln_a_minus_b(a: f64, b: f64) -> f64 {
189    if b.is_infinite() && b < 0.0 {
190        a
191    } else if (a - b).abs() < f64::EPSILON {
192        f64::NEG_INFINITY
193    } else {
194        // a + log(1 - exp(-(a-b))) = a + log1mexp(a-b)
195        // In our convention, ln_one_minus_exp takes negative values, but here b < a
196        a + ln_one_minus_exp(b - a)
197    }
198}
199
200/// Computes the probability of observing an error across two independent trials.
201///
202/// Matches fgbio's `probabilityOfErrorTwoTrials` exactly.
203///
204/// Given two error probabilities p1 and p2 (in log space), this models the probability
205/// of either or both reads being in error. For DNA sequencing with 4 bases:
206///
207/// f(X, Y) = X + Y - 4/3 * X * Y
208///
209/// where the 4/3 factor accounts for the probability of two errors resulting in
210/// the same incorrect base (1/3 chance).
211///
212/// This is equivalent to:
213/// P(error) = p1*(1-p2) + (1-p1)*p2 + p1*p2*(2/3)
214///
215/// The quick approximation is used when one probability is much smaller than the other
216/// (difference of 6 or more in log space corresponds to ~400x difference).
217///
218/// # Examples
219/// ```
220/// use fgumi_lib::phred::ln_error_prob_two_trials;
221///
222/// // If both trials have 10% error, combined error using 4/3 formula:
223/// // f(0.1, 0.1) = 0.1 + 0.1 - 4/3*0.1*0.1 = 0.2 - 0.0133... ≈ 0.1867
224/// let ln_p1 = 0.1_f64.ln();
225/// let ln_p2 = 0.1_f64.ln();
226/// let result = ln_error_prob_two_trials(ln_p1, ln_p2);
227/// let expected = 0.1 + 0.1 - (4.0/3.0) * 0.1 * 0.1;
228/// assert!((result.exp() - expected).abs() < 1e-10);
229/// ```
230#[must_use]
231pub fn ln_error_prob_two_trials(ln_p1: LogProbability, ln_p2: LogProbability) -> LogProbability {
232    // Match fgbio's probabilityOfErrorTwoTrials exactly
233    // Ensure ln_p1 >= ln_p2 (larger error probability first)
234    let (ln_p1, ln_p2) = if ln_p1 < ln_p2 { (ln_p2, ln_p1) } else { (ln_p1, ln_p2) };
235
236    // Quick approximation when one error probability is much smaller
237    // A difference of 6 in log space is ~400x in linear space
238    if ln_p1 - ln_p2 >= 6.0 {
239        return ln_p1;
240    }
241
242    // Full formula: f(X, Y) = X + Y - 4/3 * X * Y
243    // In log space:
244    // term1 = log(X + Y) = ln_sum_exp(ln_p1, ln_p2)
245    // term2 = log(4/3 * X * Y) = LN_FOUR_THIRDS + ln_p1 + ln_p2
246    // result = log(term1 - term2) = ln_a_minus_b(term1, term2)
247    let term1 = ln_sum_exp(ln_p1, ln_p2); // log(X + Y)
248    let term2 = LN_FOUR_THIRDS + ln_p1 + ln_p2; // log(4/3 * X * Y)
249    ln_a_minus_b(term1, term2)
250}
251
252/// Computes log(a + b) given log(a) and log(b)
253///
254/// Matches fgbio's `or` function using log1pexp for numerical stability:
255/// log(a + b) = log(a) + log(1 + exp(log(b) - log(a)))
256///            = log(a) + log1pexp(log(b) - log(a))
257///
258/// This is essential for adding probabilities while working in log-space, avoiding
259/// numerical underflow that would occur when converting back to linear space.
260///
261/// # Examples
262/// ```
263/// use fgumi_lib::phred::ln_sum_exp;
264///
265/// // ln(0.1) + ln(0.2) should give ln(0.3)
266/// let result = ln_sum_exp(0.1_f64.ln(), 0.2_f64.ln());
267/// assert!((result - 0.3_f64.ln()).abs() < 1e-10);
268///
269/// // Works with very small probabilities
270/// let result = ln_sum_exp(1e-100_f64.ln(), 2e-100_f64.ln());
271/// assert!((result - 3e-100_f64.ln()).abs() < 1e-10);
272/// ```
273#[must_use]
274pub fn ln_sum_exp(ln_a: LogProbability, ln_b: LogProbability) -> LogProbability {
275    if ln_a.is_infinite() && ln_a < 0.0 {
276        return ln_b;
277    }
278    if ln_b.is_infinite() && ln_b < 0.0 {
279        return ln_a;
280    }
281
282    // Match fgbio's `or` function: ensure ln_a <= ln_b, then use log1pexp
283    let (ln_a, ln_b) = if ln_b < ln_a { (ln_b, ln_a) } else { (ln_a, ln_b) };
284    ln_a + log1pexp(ln_b - ln_a)
285}
286
287/// Computes log(sum(exp(values))) for an array of log probabilities
288///
289/// This is the normalization constant for converting log-likelihoods to
290/// log-probabilities (posteriors). Uses the log-sum-exp trick for numerical
291/// stability across all values.
292///
293/// # Examples
294/// ```
295/// use fgumi_lib::phred::ln_sum_exp_array;
296///
297/// // ln(0.1) + ln(0.2) + ln(0.3) should give ln(0.6)
298/// let values = vec![0.1_f64.ln(), 0.2_f64.ln(), 0.3_f64.ln()];
299/// let result = ln_sum_exp_array(&values);
300/// assert!((result - 0.6_f64.ln()).abs() < 1e-10);
301///
302/// // Empty array returns negative infinity
303/// let result = ln_sum_exp_array(&[]);
304/// assert!(result.is_infinite() && result < 0.0);
305/// ```
306#[must_use]
307pub fn ln_sum_exp_array(values: &[LogProbability]) -> LogProbability {
308    if values.is_empty() {
309        return f64::NEG_INFINITY;
310    }
311
312    let mut min_value = f64::INFINITY;
313    let mut min_index = 0;
314    for (i, value) in values.iter().enumerate() {
315        if *value < min_value {
316            min_index = i;
317            min_value = *value;
318        }
319    }
320    if min_value.is_infinite() {
321        return min_value;
322    }
323    let mut sum = min_value;
324    for (i, value) in values.iter().enumerate() {
325        if i != min_index {
326            sum = ln_sum_exp(sum, *value);
327        }
328    }
329    sum
330}
331
332/// Normalizes a log probability by a normalization constant
333///
334/// Returns `ln(exp(ln_value)` / `exp(ln_norm)`) = `ln_value` - `ln_norm`
335#[inline]
336#[must_use]
337pub fn ln_normalize(ln_value: LogProbability, ln_norm: LogProbability) -> LogProbability {
338    ln_value - ln_norm
339}
340
341/// Computes ln(1 - exp(x)) = ln(NOT(exp(x)))
342#[inline]
343#[must_use]
344pub fn ln_not(x: LogProbability) -> LogProbability {
345    ln_one_minus_exp(x)
346}
347
348#[cfg(test)]
349#[expect(
350    clippy::similar_names,
351    reason = "test variables use short math-related names like ln_p, ln_q"
352)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_phred_to_ln_error() {
358        // Q10 = 10% error = 0.1
359        let q10_error = phred_to_ln_error_prob(10);
360        assert!((q10_error - 0.1_f64.ln()).abs() < 1e-10);
361
362        // Q20 = 1% error = 0.01
363        let q20_error = phred_to_ln_error_prob(20);
364        assert!((q20_error - 0.01_f64.ln()).abs() < 1e-10);
365
366        // Q30 = 0.1% error = 0.001
367        let q30_error = phred_to_ln_error_prob(30);
368        assert!((q30_error - 0.001_f64.ln()).abs() < 1e-10);
369    }
370
371    #[test]
372    fn test_phred_round_trip() {
373        // Note: Q0 and Q1 will be clamped to MIN_PHRED (2) on the round trip
374        for q in [MIN_PHRED, 10, 20, 30, 40, 50, 60] {
375            let ln_p = phred_to_ln_error_prob(q);
376            let q_back = ln_prob_to_phred(ln_p);
377            assert_eq!(q, q_back);
378        }
379    }
380
381    #[test]
382    fn test_ln_sum_exp() {
383        // ln(0.1) + ln(0.2) should give ln(0.3)
384        let result = ln_sum_exp(0.1_f64.ln(), 0.2_f64.ln());
385        assert!((result - 0.3_f64.ln()).abs() < 1e-10);
386    }
387
388    #[test]
389    fn test_ln_sum_exp_array() {
390        // ln(0.1) + ln(0.2) + ln(0.3) should give ln(0.6)
391        let values = vec![0.1_f64.ln(), 0.2_f64.ln(), 0.3_f64.ln()];
392        let result = ln_sum_exp_array(&values);
393        assert!((result - 0.6_f64.ln()).abs() < 1e-10);
394    }
395
396    #[test]
397    fn test_error_two_trials() {
398        // If both trials have 10% error, combined error using 4/3 formula:
399        // f(0.1, 0.1) = 0.1 + 0.1 - 4/3*0.1*0.1 = 0.2 - 0.01333... ≈ 0.18667
400        let ln_p1 = 0.1_f64.ln();
401        let ln_p2 = 0.1_f64.ln();
402        let result = ln_error_prob_two_trials(ln_p1, ln_p2);
403        let expected = 0.1 + 0.1 - (4.0 / 3.0) * 0.1 * 0.1;
404        assert!((result.exp() - expected).abs() < 1e-10);
405    }
406
407    /// Ported from fgbio's NumericTypesTest.scala
408    /// Tests the comprehensive probabilityOfErrorTwoTrials formula
409    #[test]
410    fn test_error_two_trials_comprehensive_fgbio() {
411        // From fgbio: for (i <- 1 to 100; j <- 1 to 100)
412        // expected = (p1*(1-p2)) + ((1-p1)*p2) + (p1 * p2 * 2/3)
413        // This simplifies to: p1 + p2 - 4/3*p1*p2
414        for i in 1..=100 {
415            for j in 1..=100 {
416                let p1 = 1.0 / f64::from(i);
417                let p2 = 1.0 / f64::from(j);
418                let expected = (p1 * (1.0 - p2)) + ((1.0 - p1) * p2) + (p1 * p2 * (2.0 / 3.0));
419                let ln_p1 = p1.ln();
420                let ln_p2 = p2.ln();
421                let actual = ln_error_prob_two_trials(ln_p1, ln_p2);
422                assert!(
423                    (actual.exp() - expected).abs() < 0.0001,
424                    "Failed for i={}, j={}: actual={}, expected={}",
425                    i,
426                    j,
427                    actual.exp(),
428                    expected
429                );
430            }
431        }
432    }
433
434    /// Ported from fgbio's `NumericTypesTest`: "or in log space"
435    #[test]
436    fn test_ln_sum_exp_fgbio() {
437        // exp(or(10, 10)) shouldBe exp(10)*2
438        let result = ln_sum_exp(10.0, 10.0);
439        assert!((result.exp() - 10.0_f64.exp() * 2.0).abs() < 0.00001);
440
441        // exp(or(10, 20)) shouldBe exp(10)+exp(20)
442        let result = ln_sum_exp(10.0, 20.0);
443        assert!((result.exp() - (10.0_f64.exp() + 20.0_f64.exp())).abs() < 0.00001);
444
445        // exp(or(20, 10)) shouldBe exp(20)+exp(10)
446        let result = ln_sum_exp(20.0, 10.0);
447        assert!((result.exp() - (20.0_f64.exp() + 10.0_f64.exp())).abs() < 0.00001);
448
449        // or(10, LnZero) shouldBe exp(10)
450        let result = ln_sum_exp(10.0, f64::NEG_INFINITY);
451        assert!((result.exp() - 10.0_f64.exp()).abs() < 0.00001);
452
453        // or(LnZero, 10) shouldBe exp(10)
454        let result = ln_sum_exp(f64::NEG_INFINITY, 10.0);
455        assert!((result.exp() - 10.0_f64.exp()).abs() < 0.00001);
456
457        // or(-718.39..., -8.40...) shouldBe -8.40... (very small + small = small)
458        let result = ln_sum_exp(-718.394_775_628_242_3, -8.404_216_861_178_751);
459        assert!((result - (-8.404_216_861_178_751)).abs() < 0.00001);
460    }
461
462    /// Ported from fgbio's `NumericTypesTest`: "aOrNotB in log space"
463    #[test]
464    fn test_ln_a_minus_b_fgbio() {
465        let q10 = phred_to_ln_error_prob(10); // ln(0.1)
466        let q20 = phred_to_ln_error_prob(20); // ln(0.01)
467
468        // aOrNotB(10, 10) shouldBe LnZero
469        assert!(ln_a_minus_b(10.0, 10.0).is_infinite() && ln_a_minus_b(10.0, 10.0) < 0.0);
470
471        // aOrNotB(q10, q10) shouldBe LnZero
472        assert!(ln_a_minus_b(q10, q10).is_infinite() && ln_a_minus_b(q10, q10) < 0.0);
473
474        // exp(aOrNotB(q10, q20)) shouldBe (0.1-0.01)
475        let result = ln_a_minus_b(q10, q20);
476        assert!((result.exp() - 0.09).abs() < 0.00001, "Expected 0.09, got {}", result.exp());
477
478        // aOrNotB(LnTen, LnZero) shouldBe LnTen
479        let ln_ten = 10.0_f64.ln();
480        let result = ln_a_minus_b(ln_ten, f64::NEG_INFINITY);
481        assert!((result - ln_ten).abs() < 0.00001);
482    }
483
484    /// Ported from fgbio's `NumericTypesTest`: "1 - probability in log space"
485    #[test]
486    fn test_ln_one_minus_exp_fgbio() {
487        let q10 = phred_to_ln_error_prob(10); // ln(0.1)
488        let q20 = phred_to_ln_error_prob(20); // ln(0.01)
489
490        // exp(not(q10)) shouldBe 0.9
491        let result = ln_one_minus_exp(q10);
492        assert!((result.exp() - 0.9).abs() < 0.00001);
493
494        // exp(not(q20)) shouldBe 0.99
495        let result = ln_one_minus_exp(q20);
496        assert!((result.exp() - 0.99).abs() < 0.00001);
497
498        // exp(not(ln(0.90))) shouldBe 0.1
499        let result = ln_one_minus_exp(0.90_f64.ln());
500        assert!((result.exp() - 0.1).abs() < 0.00001);
501
502        // exp(not(ln(0.99))) shouldBe 0.01
503        let result = ln_one_minus_exp(0.99_f64.ln());
504        assert!((result.exp() - 0.01).abs() < 0.00001);
505
506        // exp(not(LnZero)) shouldBe 1.0
507        let result = ln_one_minus_exp(f64::NEG_INFINITY);
508        assert!((result.exp() - 1.0).abs() < 0.00001);
509    }
510
511    /// Ported from fgbio's `NumericTypesTest`: phred score conversions
512    #[test]
513    fn test_phred_conversions_fgbio() {
514        // fromLogProbability(toLogProbability(0.0)) shouldBe MaxValue
515        let ln_zero = 0.0_f64.ln(); // NEG_INFINITY
516        assert_eq!(ln_prob_to_phred(ln_zero), MAX_PHRED);
517
518        // fromLogProbability(toLogProbability(0.1)) shouldBe 10
519        let ln_01 = 0.1_f64.ln();
520        assert_eq!(ln_prob_to_phred(ln_01), 10);
521
522        // fromLogProbability(toLogProbability(0.5)) shouldBe 3
523        let ln_05 = 0.5_f64.ln();
524        assert_eq!(ln_prob_to_phred(ln_05), 3);
525
526        // fromLogProbability(toLogProbability(1.0)) shouldBe 0
527        let ln_1 = 1.0_f64.ln(); // 0.0
528        // Note: fgbio returns 0, but our MIN_PHRED is 2, so we clamp to 2
529        assert_eq!(ln_prob_to_phred(ln_1), MIN_PHRED);
530    }
531
532    /// Test log1pexp implementation (Equation 10)
533    #[test]
534    fn test_log1pexp() {
535        // Test threshold regions
536        // x <= -37: returns exp(x)
537        assert!((log1pexp(-50.0) - (-50.0_f64).exp()).abs() < 1e-10);
538        assert!((log1pexp(-37.0) - (-37.0_f64).exp().ln_1p()).abs() < 1e-10);
539
540        // x <= 18: returns log1p(exp(x))
541        assert!((log1pexp(0.0) - (1.0_f64 + 1.0).ln()).abs() < 1e-10);
542        assert!((log1pexp(10.0) - (1.0 + 10.0_f64.exp()).ln()).abs() < 1e-10);
543
544        // x <= 33.3: returns x + exp(-x)
545        assert!((log1pexp(25.0) - (25.0 + (-25.0_f64).exp())).abs() < 1e-10);
546
547        // x > 33.3: returns x
548        assert!((log1pexp(40.0) - 40.0).abs() < 1e-10);
549        assert!((log1pexp(100.0) - 100.0).abs() < 1e-10);
550    }
551
552    #[test]
553    fn test_ln_normalize() {
554        // Normalizing ln(0.2) by ln(0.5) should give ln(0.4)
555        let result = ln_normalize(0.2_f64.ln(), 0.5_f64.ln());
556        assert!((result - 0.4_f64.ln()).abs() < 1e-10);
557    }
558
559    // =====================================================================
560    // Additional edge case tests
561    // =====================================================================
562
563    #[test]
564    fn test_phred_boundary_values() {
565        // Test boundary Phred scores
566        // Q0 = 100% error (probability = 1.0)
567        let q0_error = phred_to_ln_error_prob(0);
568        assert!((q0_error - 0.0).abs() < 1e-10); // ln(1.0) = 0
569
570        // Q2 = MIN_PHRED
571        let q2_error = phred_to_ln_error_prob(MIN_PHRED);
572        let expected_q2 = 10.0_f64.powf(-0.2); // 10^(-2/10) ≈ 0.631
573        assert!((q2_error.exp() - expected_q2).abs() < 1e-6);
574
575        // Q93 = MAX_PHRED
576        let q93_error = phred_to_ln_error_prob(MAX_PHRED);
577        let expected_q93 = 10.0_f64.powf(-9.3); // 10^(-93/10) ≈ 5e-10
578        assert!((q93_error.exp() - expected_q93).abs() < 1e-15);
579    }
580
581    #[test]
582    fn test_phred_to_ln_correct_boundary() {
583        // Q0: error = 1.0, correct = 0.0 → ln(0) = -∞
584        let q0_correct = phred_to_ln_correct_prob(0);
585        assert!(q0_correct.is_infinite() && q0_correct < 0.0);
586
587        // Q93: error ≈ 5e-10, correct ≈ 1.0 → ln(correct) ≈ 0
588        let q93_correct = phred_to_ln_correct_prob(MAX_PHRED);
589        assert!(q93_correct < 0.0 && q93_correct.abs() < 1e-9);
590    }
591
592    #[test]
593    fn test_ln_prob_to_phred_clamping() {
594        // Very high error (low quality) should clamp to MIN_PHRED
595        let very_high_error = 0.9_f64.ln(); // 90% error
596        assert_eq!(ln_prob_to_phred(very_high_error), MIN_PHRED);
597
598        // 100% error should also clamp to MIN_PHRED
599        // ln(1.0) = 0, which gives Q = 0, clamped to MIN_PHRED
600        assert_eq!(ln_prob_to_phred(0.0), MIN_PHRED);
601
602        // Very low error should clamp to MAX_PHRED
603        let very_low_error = 1e-15_f64.ln();
604        assert_eq!(ln_prob_to_phred(very_low_error), MAX_PHRED);
605    }
606
607    #[test]
608    fn test_ln_sum_exp_array_edge_cases() {
609        // Empty array
610        let result = ln_sum_exp_array(&[]);
611        assert!(result.is_infinite() && result < 0.0);
612
613        // Single element
614        let result = ln_sum_exp_array(&[0.5_f64.ln()]);
615        assert!((result - 0.5_f64.ln()).abs() < 1e-10);
616
617        // Two elements (should match ln_sum_exp)
618        let a = 0.2_f64.ln();
619        let b = 0.3_f64.ln();
620        let array_result = ln_sum_exp_array(&[a, b]);
621        let pair_result = ln_sum_exp(a, b);
622        assert!((array_result - pair_result).abs() < 1e-10);
623
624        // All negative infinity
625        let result = ln_sum_exp_array(&[f64::NEG_INFINITY, f64::NEG_INFINITY]);
626        assert!(result.is_infinite() && result < 0.0);
627    }
628
629    #[test]
630    fn test_ln_sum_exp_very_small_values() {
631        // Very small probabilities (should not underflow)
632        let a = 1e-300_f64.ln();
633        let b = 2e-300_f64.ln();
634        let result = ln_sum_exp(a, b);
635        let expected = 3e-300_f64.ln();
636        assert!((result - expected).abs() < 1.0); // Large tolerance for extreme values
637    }
638
639    #[test]
640    fn test_ln_not_edge_cases() {
641        // ln_not(ln(0)) = ln(1 - 0) = ln(1) = 0
642        let result = ln_not(f64::NEG_INFINITY);
643        assert!((result - 0.0).abs() < 1e-10);
644
645        // ln_not(ln(1)) = ln(1 - 1) = ln(0) = -∞
646        let result = ln_not(0.0);
647        assert!(result.is_infinite() && result < 0.0);
648
649        // ln_not(ln(0.5)) = ln(0.5)
650        let result = ln_not(0.5_f64.ln());
651        assert!((result - 0.5_f64.ln()).abs() < 1e-10);
652
653        // Positive value (invalid probability) should return -∞
654        let result = ln_not(1.0);
655        assert!(result.is_infinite() && result < 0.0);
656    }
657
658    #[test]
659    fn test_error_two_trials_quick_approximation() {
660        // When one probability is much smaller (>6 in log space), use quick approximation
661        let large_error = 0.5_f64.ln(); // ln(0.5) ≈ -0.693
662        let small_error = 1e-6_f64.ln(); // ln(1e-6) ≈ -13.8
663
664        let result = ln_error_prob_two_trials(large_error, small_error);
665        // Should approximately equal the larger error
666        assert!((result - large_error).abs() < 0.01);
667
668        // Also test with arguments swapped
669        let result2 = ln_error_prob_two_trials(small_error, large_error);
670        assert!((result2 - large_error).abs() < 0.01);
671    }
672
673    #[test]
674    fn test_log1pexp_boundary_values() {
675        // At threshold -37
676        let at_threshold = log1pexp(-37.0);
677        assert!(at_threshold > 0.0);
678        assert!(at_threshold < 1e-15);
679
680        // At threshold 18
681        let at_18 = log1pexp(18.0);
682        let expected = (1.0 + 18.0_f64.exp()).ln();
683        assert!((at_18 - expected).abs() < 1e-10);
684
685        // At threshold 33.3
686        let at_33 = log1pexp(33.3);
687        let expected = 33.3 + (-33.3_f64).exp();
688        assert!((at_33 - expected).abs() < 1e-10);
689    }
690
691    #[test]
692    fn test_constants() {
693        // Verify constants are correct
694        assert!((LN_TWO - std::f64::consts::LN_2).abs() < 1e-15);
695        assert!((LN_FOUR_THIRDS - (4.0_f64 / 3.0).ln()).abs() < 1e-15);
696        assert!(LN_ONE.abs() < 1e-15); // LN_ONE should be 0.0
697        assert_eq!(MIN_PHRED, 2);
698        assert_eq!(MAX_PHRED, 93);
699        assert_eq!(NO_CALL_BASE, b'N');
700        assert_eq!(NO_CALL_BASE_LOWER, b'n');
701    }
702}
fgumi_consensus/phred.rs

fgumi_consensus/
phred.rs