pub mod adaptive_integration;
pub mod cdf;
pub mod errors;
use std::convert::TryFrom;
use std::f64;
use std::iter;
use std::mem;
use std::ops::{Add, AddAssign, Div, Mul, Sub, SubAssign};
use itertools::Itertools;
use itertools_num::linspace;
use num_traits::{Float, Zero};
use ordered_float::{FloatIsNan, NotNan};
use crate::utils::FastExp;
pub use self::errors::{Error, Result};
const LOG_TO_PHRED_FACTOR: f64 = -4.342_944_819_032_517_5;
const PHRED_TO_LOG_FACTOR: f64 = -0.230_258_509_299_404_56;
fn ln_1m_exp(p: f64) -> f64 {
assert!(p <= 0.0);
if p < -0.693 {
(-p.fastexp()).ln_1p()
} else {
(-p.exp_m1()).ln()
}
}
custom_derive! {
#[derive(
NewtypeFrom,
NewtypeDeref,
NewtypeAdd(*),
NewtypeSub(*),
NewtypeMul(*),
NewtypeDiv(*),
Default,
Copy,
Clone,
PartialEq,
PartialOrd,
Debug,
)]
#[derive(serde::Serialize, serde::Deserialize)]
pub struct Prob(pub f64);
}
impl Prob {
pub fn checked(p: f64) -> Result<Self> {
if (0.0..=1.0).contains(&p) {
Ok(Prob(p))
} else {
Err(Error::InvalidProb { prob: p })
}
}
}
custom_derive! {
#[derive(
NewtypeFrom,
NewtypeDeref,
NewtypeAdd(*),
NewtypeSub(*),
Copy,
Clone,
PartialEq,
PartialOrd,
Debug,
)]
#[derive(serde::Serialize, serde::Deserialize)]
pub struct LogProb(pub f64);
}
custom_derive! {
#[derive(
NewtypeFrom,
NewtypeDeref,
NewtypeAdd(*),
NewtypeSub(*),
Copy,
Clone,
PartialEq,
PartialOrd,
Debug,
)]
#[derive(serde::Serialize, serde::Deserialize)]
pub struct PHREDProb(pub f64);
}
pub type ScanIter<I> = iter::Scan<
<I as IntoIterator>::IntoIter,
LogProb,
fn(&mut LogProb, LogProb) -> Option<LogProb>,
>;
static LOGPROB_LN_ZERO: LogProb = LogProb(f64::NEG_INFINITY);
static LOGPROB_LN_ONE: LogProb = LogProb(0.0);
impl LogProb {
pub fn is_valid(&self) -> bool {
!self.is_nan() && **self <= 0.0
}
#[inline]
pub fn ln_zero() -> LogProb {
LOGPROB_LN_ZERO
}
#[inline]
pub fn ln_one() -> LogProb {
LOGPROB_LN_ONE
}
pub fn cap_numerical_overshoot(&self, epsilon: f64) -> LogProb {
if *self <= LogProb::ln_one() {
*self
} else {
let capped = **self - epsilon;
if capped <= 0.0 {
LogProb::ln_one()
} else {
panic!(
"Cannot correct LogProb {:?} -- not within given epsilon of 0.0 ({})",
**self, epsilon
);
}
}
}
pub fn ln_one_minus_exp(&self) -> LogProb {
LogProb(ln_1m_exp(**self))
}
pub fn ln_sum_exp(probs: &[LogProb]) -> LogProb {
if probs.is_empty() {
Self::ln_zero()
} else {
let mut pmax = probs[0];
let mut imax = 0;
for (i, &p) in probs.iter().enumerate().skip(1) {
if p > pmax {
pmax = p;
imax = i;
}
}
if pmax == Self::ln_zero() {
Self::ln_zero()
} else if *pmax == f64::INFINITY {
LogProb(f64::INFINITY)
} else {
pmax + LogProb(
(probs
.iter()
.enumerate()
.filter_map(|(i, p)| {
if i == imax || *p == Self::ln_zero() {
None
} else {
Some((p - pmax).fastexp())
}
})
.sum::<f64>())
.ln_1p(),
)
}
}
}
pub fn ln_add_exp(self, other: LogProb) -> LogProb {
if other == Self::ln_zero() {
self
} else {
let (mut p0, mut p1) = (self, other);
if p1 > p0 {
mem::swap(&mut p0, &mut p1);
}
if p0 == Self::ln_zero() {
Self::ln_zero()
} else if *p0 == f64::INFINITY {
LogProb(f64::INFINITY)
} else {
p0 + LogProb((p1 - p0).fastexp().ln_1p())
}
}
}
pub fn ln_sub_exp(self, other: LogProb) -> LogProb {
if other == Self::ln_zero() {
self
} else {
let (p0, p1) = (self, other);
assert!(
p0 >= p1,
"Subtraction would lead to negative probability, which is undefined in log space."
);
if *p1 == f64::NEG_INFINITY {
p0
} else if relative_eq!(*p0, *p1) || p0 == Self::ln_zero() {
Self::ln_zero()
} else if *p0 == f64::INFINITY {
LogProb(f64::INFINITY)
} else {
p0 + (p1 - p0).ln_one_minus_exp()
}
}
}
pub fn ln_cumsum_exp<I: IntoIterator<Item = LogProb>>(probs: I) -> ScanIter<I> {
probs
.into_iter()
.scan(Self::ln_zero(), Self::scan_ln_add_exp)
}
pub fn ln_trapezoidal_integrate_exp<T, D>(mut density: D, a: T, b: T, n: usize) -> LogProb
where
T: Copy + Add<Output = T> + Sub<Output = T> + Div<Output = T> + Mul<Output = T> + Float,
D: FnMut(usize, T) -> LogProb,
f64: From<T>,
{
let mut probs = linspace(a, b, n)
.enumerate()
.dropping(1)
.dropping_back(1)
.map(|(i, v)| LogProb(*density(i, v) + 2.0f64.ln()))
.collect_vec();
probs.push(density(0, a));
probs.push(density(n, b));
let width = f64::from(b - a);
LogProb(*Self::ln_sum_exp(&probs) + width.ln() - (2.0 * (n - 1) as f64).ln())
}
pub fn ln_simpsons_integrate_exp<T, D>(mut density: D, a: T, b: T, n: usize) -> LogProb
where
T: Copy + Add<Output = T> + Sub<Output = T> + Div<Output = T> + Mul<Output = T> + Float,
D: FnMut(usize, T) -> LogProb,
f64: From<T>,
{
assert_eq!(n % 2, 1, "n must be odd");
let mut probs = linspace(a, b, n)
.enumerate()
.dropping(1)
.dropping_back(1)
.map(|(i, v)| {
let weight = (2 + (i % 2) * 2) as f64;
LogProb(*density(i, v) + weight.ln()) })
.collect_vec();
probs.push(density(0, a));
probs.push(density(n, b));
let width = f64::from(b - a);
LogProb(*Self::ln_sum_exp(&probs) + width.ln() - ((n - 1) as f64).ln() - 3.0f64.ln())
}
pub fn ln_trapezoidal_integrate_grid_exp<T, D>(mut density: D, grid: &[T]) -> LogProb
where
T: Copy + Add<Output = T> + Sub<Output = T> + Div<Output = T> + Mul<Output = T> + Float,
D: FnMut(usize, T) -> LogProb,
f64: From<T>,
{
let probs = grid
.iter()
.enumerate()
.dropping(1)
.map(|(i, v)| {
LogProb(
*(density(i - 1, grid[i - 1]).ln_add_exp(density(i, *v))) - 2.0f64.ln()
+ f64::from(*v - grid[i - 1]).ln(),
)
})
.collect_vec();
LogProb::ln_sum_exp(&probs)
}
fn scan_ln_add_exp(s: &mut LogProb, p: LogProb) -> Option<LogProb> {
*s = s.ln_add_exp(p);
Some(*s)
}
}
impl<'a> iter::Sum<&'a LogProb> for LogProb {
fn sum<I: Iterator<Item = &'a LogProb>>(iter: I) -> Self {
iter.fold(LogProb(0.0), |a, b| a + *b)
}
}
impl iter::Sum<LogProb> for LogProb {
fn sum<I: Iterator<Item = LogProb>>(iter: I) -> Self {
iter.fold(LogProb(0.0), |a, b| a + b)
}
}
impl AddAssign for LogProb {
fn add_assign(&mut self, other: LogProb) {
*self = *self + other;
}
}
impl SubAssign for LogProb {
fn sub_assign(&mut self, other: LogProb) {
*self = *self - other;
}
}
impl From<NotNan<f64>> for LogProb {
fn from(p: NotNan<f64>) -> LogProb {
LogProb(*p)
}
}
impl TryFrom<LogProb> for NotNan<f64> {
type Error = FloatIsNan;
fn try_from(p: LogProb) -> Result<Self, Self::Error> {
NotNan::new(*p)
}
}
impl From<LogProb> for Prob {
fn from(p: LogProb) -> Prob {
Prob(p.fastexp())
}
}
impl From<PHREDProb> for Prob {
fn from(p: PHREDProb) -> Prob {
Prob(10.0f64.powf(-*p / 10.0))
}
}
impl From<Prob> for LogProb {
fn from(p: Prob) -> LogProb {
LogProb(p.ln())
}
}
impl From<PHREDProb> for LogProb {
fn from(p: PHREDProb) -> LogProb {
LogProb(*p * PHRED_TO_LOG_FACTOR)
}
}
impl From<Prob> for PHREDProb {
fn from(p: Prob) -> PHREDProb {
PHREDProb(-10.0 * p.log10())
}
}
impl From<LogProb> for PHREDProb {
fn from(p: LogProb) -> PHREDProb {
PHREDProb(*p * LOG_TO_PHRED_FACTOR)
}
}
impl Default for LogProb {
fn default() -> LogProb {
LogProb::ln_zero()
}
}
impl Default for PHREDProb {
fn default() -> PHREDProb {
PHREDProb::from(Prob(0.0))
}
}
impl Zero for Prob {
fn zero() -> Self {
Prob(0.0)
}
fn is_zero(&self) -> bool {
**self == 0.0
}
}
impl Zero for LogProb {
fn zero() -> Self {
LogProb::ln_zero()
}
fn is_zero(&self) -> bool {
self.is_infinite() && self.signum() < 0.0
}
}
impl Zero for PHREDProb {
fn zero() -> Self {
PHREDProb::from(Prob(0.0))
}
fn is_zero(&self) -> bool {
*self == Self::zero()
}
}
#[cfg(test)]
mod tests {
use super::*;
use itertools::Itertools;
#[test]
fn test_sum() {
let probs = [LogProb::ln_zero(), LogProb::ln_one(), LogProb::ln_zero()];
assert_eq!(LogProb::ln_sum_exp(&probs), LogProb::ln_one());
}
#[test]
fn test_empty_sum() {
assert_eq!(LogProb::ln_sum_exp(&[]), LogProb::ln_zero());
}
#[test]
fn test_cumsum() {
let probs = vec![
LogProb::ln_zero(),
LogProb(0.01f64.ln()),
LogProb(0.001f64.ln()),
];
let cumsum = LogProb::ln_cumsum_exp(probs).collect_vec();
assert_relative_eq!(*cumsum[0], *LogProb::ln_zero());
assert_relative_eq!(*cumsum[1], 0.01f64.ln());
assert_relative_eq!(*cumsum[2], 0.011f64.ln(), epsilon = 0.000001);
}
#[test]
fn test_sub() {
assert_eq!(
LogProb::ln_one().ln_sub_exp(LogProb::ln_one()),
LogProb::ln_zero()
);
assert_relative_eq!(
*LogProb::ln_one().ln_sub_exp(LogProb(0.5f64.ln())),
*LogProb(0.5f64.ln()),
epsilon = 0.0000000001
);
assert_relative_eq!(
*LogProb(-1.6094379124341).ln_sub_exp(LogProb::ln_zero()),
*LogProb(-1.6094379124341)
);
}
#[test]
fn test_one_minus() {
assert_eq!(LogProb::ln_zero().ln_one_minus_exp(), LogProb::ln_one());
assert_eq!(LogProb::ln_one().ln_one_minus_exp(), LogProb::ln_zero());
}
#[test]
fn test_trapezoidal_integrate() {
let density = |_, _| LogProb(0.1f64.ln());
let prob = LogProb::ln_trapezoidal_integrate_exp(density, 0.0, 10.0, 5);
assert_relative_eq!(*prob, *LogProb::ln_one(), epsilon = 0.0000001);
}
#[test]
fn test_trapezoidal_integrate_grid() {
let grid: Vec<_> = linspace(0.0, 10.0, 5).collect();
let density = |_, _| LogProb(0.1f64.ln());
let prob = LogProb::ln_trapezoidal_integrate_grid_exp(density, &grid);
assert_relative_eq!(*prob, *LogProb::ln_one(), epsilon = 0.0000001);
}
#[test]
fn test_simpsons_integrate() {
let density = |_, _| LogProb(0.1f64.ln());
let prob = LogProb::ln_simpsons_integrate_exp(density, 0.0, 10.0, 5);
assert_relative_eq!(*prob, *LogProb::ln_one(), epsilon = 0.0000001);
}
#[test]
fn test_cap_numerical_overshoot() {
let under_top = LogProb(-0.00000005);
assert_eq!(under_top, under_top.cap_numerical_overshoot(0.0000001));
let over_top = LogProb(0.00000005);
assert_eq!(
LogProb::ln_one(),
over_top.cap_numerical_overshoot(0.0000001)
);
}
#[test]
#[should_panic]
fn test_cap_numerical_overshoot_panic() {
let over_top = LogProb(0.00000005);
over_top.cap_numerical_overshoot(0.00000001);
}
#[test]
fn test_sum_one_zero() {
assert_eq!(
LogProb::ln_one().ln_add_exp(LogProb::ln_zero()),
LogProb::ln_one()
);
}
#[test]
fn test_zero() {
assert!(LogProb::zero().is_zero());
assert!(Prob::zero().is_zero());
assert!(PHREDProb::zero().is_zero());
}
}