use roots::{SimpleConvergency, find_root_brent};
use statrs::distribution::{ContinuousCDF, Normal};
use crate::{Error, ScaledType};
fn exp_n_mutated(l: f64, k: f64, r1: f64) -> f64 {
let q = r1_to_q(k, r1);
l * q
}
fn var_n_mutated(l: f64, k: f64, r1: f64, q: Option<f64>) -> Result<f64, Error> {
if r1 == 0.0 {
return Ok(0.0);
}
let q = q.unwrap_or_else(|| r1_to_q(k, r1));
let var_n = l * (1.0 - q) * (q * (2.0 * k + (2.0 / r1) - 1.0) - 2.0 * k)
+ k * (k - 1.0) * (1.0 - q).powi(2)
+ (2.0 * (1.0 - q) / (r1.powi(2))) * ((1.0 + (k - 1.0) * (1.0 - q)) * r1 - q);
if var_n < 0.0 {
Err(Error::ANIEstimationError {
message: "varN is less than 0.0".into(),
})
} else {
Ok(var_n)
}
}
fn exp_n_mutated_squared(l: f64, k: f64, p: f64) -> Result<f64, Error> {
let var_n = var_n_mutated(l, k, p, None)?;
let exp_n_squared = exp_n_mutated(l, k, p).powi(2);
Ok(var_n + exp_n_squared)
}
fn probit(p: f64) -> f64 {
Normal::new(0.0, 1.0).unwrap().inverse_cdf(p)
}
fn r1_to_q(k: f64, r1: f64) -> f64 {
1.0 - (1.0 - r1).powi(k as i32)
}
#[allow(dead_code)]
fn get_exp_probability_nothing_common(
ani_estimate: f64,
ksize: f64,
f_scaled: f64,
n_unique_kmers: f64,
) -> Result<f64, Error> {
if ani_estimate == 0.0 || ani_estimate == 1.0 {
Ok(1.0 - ani_estimate)
} else {
let exp_nmut = exp_n_mutated(n_unique_kmers, ksize, 1.0 - ani_estimate);
let mut expected_log_probability = (n_unique_kmers - exp_nmut) * (1.0 - f_scaled).ln();
if expected_log_probability.is_infinite() {
expected_log_probability = f64::NEG_INFINITY;
}
Ok(expected_log_probability.exp())
}
}
pub fn ani_from_containment(containment: f64, ksize: f64) -> f64 {
if containment == 0.0 {
0.0
} else if containment == 1.0 {
1.0
} else {
1.0 - (1.0 - containment.powf(1.0 / ksize))
}
}
pub fn ani_ci_from_containment(
containment: f64,
ksize: f64,
scaled: ScaledType,
n_unique_kmers: u64,
confidence: Option<f64>,
) -> Result<(f64, f64), Error> {
if containment == 0.0 {
return Ok((0.0, 0.0));
} else if containment == 1.0 {
return Ok((1.0, 1.0));
}
let confidence = confidence.unwrap_or(0.95);
let scaled_f64 = scaled as f64;
let f_scaled = 1.0 / scaled_f64;
let n_unique_kmers = n_unique_kmers as f64;
let alpha = 1.0 - confidence;
let z_alpha = probit(1.0 - alpha / 2.0);
let bias_factor = 1.0 - (1.0 - f_scaled).powi(n_unique_kmers as i32);
let term_1 = (1.0 - f_scaled) / (f_scaled * (n_unique_kmers).powi(3) * bias_factor.powi(2));
let term_2 = |pest: f64| {
n_unique_kmers * exp_n_mutated(n_unique_kmers, ksize, pest)
- exp_n_mutated_squared(n_unique_kmers, ksize, pest).unwrap_or(0.0)
};
let term_3 = |pest: f64| {
var_n_mutated(n_unique_kmers, ksize, pest, None).unwrap_or(0.0) / (n_unique_kmers).powi(2)
};
let var_direct = |pest: f64| term_1 * term_2(pest) + term_3(pest);
let f1 = |pest: f64| {
(1.0 - pest).powi(ksize as i32) + z_alpha * var_direct(pest).sqrt() - containment
};
let f2 = |pest: f64| {
(1.0 - pest).powi(ksize as i32) - z_alpha * var_direct(pest).sqrt() - containment
};
let mut convergency = SimpleConvergency {
eps: 1e-15,
max_iter: 1000,
};
let dist_sol1 =
find_root_brent(0.0000001, 0.9999999, &f1, &mut convergency).unwrap_or_default();
let dist_sol2 =
find_root_brent(0.0000001, 0.9999999, &f2, &mut convergency).unwrap_or_default();
Ok((1.0 - dist_sol1, 1.0 - dist_sol2))
}
#[cfg(test)]
mod tests {
use super::*;
use std::f64::EPSILON;
#[test]
fn test_containment_to_ani_zero() {
let contain = 0.0;
let ksize = 21;
let scaled = 10;
let n_unique_kmers = 100;
let confidence = Some(0.95);
let res = ani_from_containment(contain, ksize as f64);
assert_eq!(res, 0.0);
let (ci_low, ci_high) =
ani_ci_from_containment(contain, ksize as f64, scaled, n_unique_kmers, confidence)
.unwrap();
eprintln!("{}", ci_low);
eprintln!("{}", ci_high);
assert_eq!(ci_low, 0.0);
assert_eq!(ci_high, 0.0);
}
#[test]
fn test_containment_to_ani_one() {
let contain = 1.0;
let ksize = 21;
let scaled = 10;
let n_unique_kmers = 100;
let confidence = None;
let res = ani_from_containment(contain, ksize as f64);
assert_eq!(res, 1.0);
let (ci_low, ci_high) =
ani_ci_from_containment(contain, ksize as f64, scaled, n_unique_kmers, confidence)
.unwrap();
assert_eq!(ci_low, 1.0);
assert_eq!(ci_high, 1.0);
}
#[test]
fn test_containment_to_ani_scaled1() {
let contain = 0.5;
let ksize = 21;
let scaled = 1;
let n_unique_kmers = 10000;
let confidence = None;
let ani = ani_from_containment(contain, ksize as f64);
assert!((ani - 0.9675317785238916) < EPSILON);
let (ci_low, ci_high) =
ani_ci_from_containment(contain, ksize as f64, scaled, n_unique_kmers, confidence)
.unwrap();
assert!((ci_low - 0.9635213980271021) < EPSILON);
assert!((ci_high - 0.9712900870335944) < EPSILON);
}
#[test]
fn test_containment_to_ani_scaled100() {
let contain = 0.1;
let ksize = 31;
let scaled = 100;
let n_unique_kmers = 10000;
let confidence = None;
let ani = ani_from_containment(contain, ksize as f64);
assert!((ani - 0.9284145445194744) < EPSILON);
let (ci_low, ci_high) =
ani_ci_from_containment(contain, ksize as f64, scaled, n_unique_kmers, confidence)
.unwrap();
assert!((ci_low - 0.9094445232754665) < EPSILON);
assert!((ci_high - 0.9467922076143345) < EPSILON);
}
#[test]
fn test_containment_to_ani_scaled100_2() {
let contain = 0.5;
let ksize = 21;
let scaled = 100;
let n_unique_kmers = 10000;
let confidence = None;
let ani = ani_from_containment(contain, ksize as f64);
assert!((ani - 0.9675317785238916) < EPSILON);
let (ci_low, ci_high) =
ani_ci_from_containment(contain, ksize as f64, scaled, n_unique_kmers, confidence)
.unwrap();
assert!((ci_low - 0.9569003945603415) < EPSILON);
assert!((ci_high - 0.9762879360833708) < EPSILON);
}
#[test]
fn test_var_n_mutated_zero() {
let r = 0.0;
let ksize = 31;
let nkmers = 200;
let var_n_mut = var_n_mutated(nkmers as f64, ksize as f64, r, None).unwrap(); assert_eq!(var_n_mut, 0.0, "Expected variance to be 0 for r=0");
}
#[test]
fn test_var_n_mutated_value_error() {
let r = 10.0;
let ksize = 31;
let nkmers = 200;
match var_n_mutated(nkmers as f64, ksize as f64, r, None) {
Err(e) => assert_eq!(
e.to_string(),
"error while calculating ANI confidence intervals: varN is less than 0.0",
"Unexpected error message"
),
Ok(_) => panic!("Expected an error, but got Ok"),
}
}
#[test]
fn test_var_n_mutated_success() {
let r = 0.4;
let ksize = 31;
let nkmers = 200_000;
let var_n_mut = var_n_mutated(nkmers as f64, ksize as f64, r, None).unwrap(); let expected = 0.10611425440741508;
assert!(
(var_n_mut - expected).abs() < f64::EPSILON,
"Variance did not match expected value"
);
}
#[test]
fn test_r1_to_q() {
let k = 2.0;
let r1 = 0.5;
let result = r1_to_q(k, r1);
let expected = 0.75;
assert!(
(result - expected).abs() < EPSILON,
"The result of r1_to_q({}, {}) was {}, but {} was expected",
k,
r1,
result,
expected
);
}
#[test]
fn test_exp_n_mutated() {
let l = 100.0;
let k = 2.0;
let r1 = 0.5;
let expected_q = r1_to_q(k, r1);
let expected_result = l * expected_q;
let result = exp_n_mutated(l, k, r1);
assert!(
(result - expected_result).abs() < EPSILON,
"The result of exp_n_mutated({}, {}, {}) was {}, but {} was expected",
l,
k,
r1,
result,
expected_result
);
}
#[test]
fn test_get_exp_probability_nothing_common_ani_zero() {
let ani_estimate = 0.0;
let ksize = 31.0;
let f_scaled = 0.1;
let n_unique_kmers = 1000.0;
let result =
get_exp_probability_nothing_common(ani_estimate, ksize, f_scaled, n_unique_kmers)
.unwrap();
assert_eq!(
result, 1.0,
"Expected probability for ani_estimate of 0 to be 1.0"
);
}
#[test]
fn test_get_exp_probability_nothing_common_ani_one() {
let ani_estimate = 1.0;
let ksize = 31.0;
let f_scaled = 0.1;
let n_unique_kmers = 1000.0;
let result =
get_exp_probability_nothing_common(ani_estimate, ksize, f_scaled, n_unique_kmers)
.unwrap();
assert_eq!(
result, 0.0,
"Expected probability for ani_estimate of 1 to be 0.0"
);
}
#[test]
fn test_get_exp_probability_nothing_common() {
let contain = 0.1;
let ksize = 31 as f64;
let scaled = 10;
let f_scaled = 1.0 / scaled as f64;
let n_unique_kmers = 1000;
let ani = ani_from_containment(contain, ksize);
let result =
get_exp_probability_nothing_common(ani, ksize, f_scaled, n_unique_kmers as f64)
.unwrap();
assert!(
result >= 0.0 && result <= 1.0,
"The result should be a valid probability"
);
assert!((result - 0.000026561398887587855) < EPSILON);
}
}