lorikeet_genome/utils/
math_utils.rs1use ordered_float::OrderedFloat;
2use statrs::function::gamma::ln_gamma;
3use std::clone::Clone;
4use std::ops::{Add, AddAssign, Mul, Sub};
5
6use crate::utils::natural_log_utils::NaturalLogUtils;
7
8lazy_static! {
9 static ref cache: Vec<f64> = (0..((JacobianLogTable::MAX_TOLERANCE
10 / JacobianLogTable::TABLE_STEP)
11 + 1.0) as usize)
12 .into_iter()
13 .map(|k| { (1.0 + (10.0_f64).powf(-(k as f64) * JacobianLogTable::TABLE_STEP)).log10() })
14 .collect::<Vec<f64>>();
15 pub static ref LOG10_ONE_HALF: f64 = (0.5 as f64).log10();
16 pub static ref LOG10_ONE_THIRD: f64 = -((3.0 as f64).log10());
17 pub static ref LOG_ONE_THIRD: f64 = -((3.0 as f64).ln());
18 pub static ref INV_LOG_2: f64 = (1.0 as f64) / (2.0 as f64).ln();
19 static ref LOG_10: f64 = (10. as f64).ln();
20 static ref INV_LOG_10: f64 = (1.0) / *LOG_10;
21 pub static ref LOG10_E: f64 = std::f64::consts::E.log10();
22 static ref ROOT_TWO_PI: f64 = (2.0 * std::f64::consts::PI).sqrt();
23}
24
25pub struct MathUtils {}
26
27impl MathUtils {
28 pub const LOG10_P_OF_ZERO: f64 = -1000000.0;
29
30 pub fn median_clone<T: PartialOrd + Copy>(numbers: &[T]) -> T {
35 let mut numbers = numbers.to_vec();
36 numbers.sort_by(|a, b| a.partial_cmp(b).unwrap());
37 let mid = numbers.len() / 2;
38 numbers[mid]
39 }
40
41 pub fn median<T: Ord + PartialOrd + Copy>(numbers: &mut [T]) -> T {
42 numbers.sort();
43 let mid = numbers.len() / 2;
44 numbers[mid]
45 }
46
47 pub fn normalize_pls(pls: &[f64]) -> Vec<f64> {
48 let mut new_pls = vec![0.0; pls.len()];
49 let smallest = *pls
50 .iter()
51 .min_by_key(|x| OrderedFloat(**x))
52 .unwrap_or(&std::f64::NAN);
53 new_pls.iter_mut().enumerate().for_each(|(i, pl)| {
54 *pl = pls[i] - smallest;
55 });
56
57 return new_pls;
58 }
59
60 pub fn ebe_add_in_place<T: Send + Sync + Add + Copy + AddAssign>(a: &mut [T], b: &[T]) {
64 a.iter_mut().enumerate().for_each(|(i, val)| *val += b[i]);
65 }
66
67 pub fn ebe_add<T: Send + Sync + Add + Copy + Add<Output = T>>(a: &[T], b: &[T]) -> Vec<T> {
71 let z = a
72 .iter()
73 .zip(b.iter())
74 .map(|(aval, bval)| *aval + *bval)
75 .collect::<Vec<T>>();
76 z
77 }
78
79 pub fn ebe_subtract<T: Send + Sync + Sub + Copy + Sub<Output = T>>(a: &[T], b: &[T]) -> Vec<T> {
83 let z = a
84 .iter()
85 .zip(b.iter())
86 .map(|(aval, bval)| *aval - *bval)
87 .collect::<Vec<T>>();
88 z
89 }
90
91 pub fn ebe_multiply<T: Send + Sync + Mul + Copy + Mul<Output = T>>(a: &[T], b: &[T]) -> Vec<T> {
95 let z = a
96 .into_iter()
97 .zip(b.iter())
98 .map(|(aval, bval)| *aval * *bval)
99 .collect::<Vec<T>>();
100 z
101 }
102
103 pub fn dot_product<
107 T: Send + Sync + Mul + Add + Copy + Mul<Output = T> + Add<Output = T> + std::iter::Sum,
108 >(
109 a: &[T],
110 b: &[T],
111 ) -> T {
112 Self::ebe_multiply(a, b).into_iter().sum::<T>()
113 }
114
115 pub fn log_to_log10(ln: f64) -> f64 {
121 ln * *LOG10_E
122 }
123
124 pub fn log10_binomial_coeffecient(n: f64, k: f64) -> f64 {
128 return MathUtils::log10_factorial(n)
129 - MathUtils::log10_factorial(k)
130 - MathUtils::log10_factorial(n - k);
131 }
132
133 pub fn log10_factorial(n: f64) -> f64 {
134 ln_gamma(n + 1.0) * *LOG10_E
135 }
136
137 pub fn max_element_index(array: &[f64], start: usize, finish: usize) -> usize {
142 let mut max_i = start;
143 for i in (start + 1)..finish {
144 if array[i] > array[max_i] {
145 max_i = i;
146 }
147 }
148
149 return max_i;
150 }
151
152 pub fn normalize_log10(mut array: Vec<f64>, take_log10_of_output: bool) -> Vec<f64> {
153 let log10_sum = MathUtils::log10_sum_log10(&array, 0, array.len());
154 array.iter_mut().for_each(|x| *x = *x - log10_sum);
155 if !take_log10_of_output {
156 array.iter_mut().for_each(|x| *x = 10.0_f64.powf(*x))
157 }
158 return array;
159 }
160
161 pub fn log10_sum_log10(log10_values: &[f64], start: usize, finish: usize) -> f64 {
162 if start >= finish {
163 return std::f64::NEG_INFINITY;
164 }
165
166 let max_element_index = MathUtils::max_element_index(log10_values, start, finish);
167
168 let max_value = log10_values[max_element_index];
169
170 if max_value == std::f64::NEG_INFINITY {
171 return max_value;
172 }
173
174 let sum_tot = 1.0
175 + log10_values[start..finish]
176 .iter()
177 .enumerate()
178 .filter(|(index, value)| {
179 *index != max_element_index && **value != std::f64::NEG_INFINITY
180 })
181 .map(|(_, value)| {
182 let scaled_val = value - max_value;
183 10.0_f64.powf(scaled_val)
184 })
185 .sum::<f64>();
186
187 if sum_tot.is_nan() || sum_tot == std::f64::INFINITY {
188 panic!("log10 p: Values must be non-infinite and non-NAN")
189 }
190
191 max_value
192 + (if (sum_tot - 1.0).abs() > f64::EPSILON {
193 sum_tot.log10()
194 } else {
195 0.0
196 })
197 }
198
199 pub fn log10_sum_log10_two_values(a: f64, b: f64) -> f64 {
200 if a > b {
201 a + (1. + 10.0_f64.powf(b - a)).log10()
202 } else {
203 b + (1. + 10.0_f64.powf(a - b)).log10()
204 }
205 }
206
207 pub fn log10_sum_log10_three_values(a: f64, b: f64, c: f64) -> f64 {
215 if a >= b && a >= c {
216 a + (1.0 + 10.0_f64.powf(b - a) + 10.0_f64.powf(c - a)).log10()
217 } else if b >= c {
218 b + (1.0 + 10.0_f64.powf(a - b) + 10.0_f64.powf(c - b)).log10()
219 } else {
220 c + (1.0 + 10.0_f64.powf(a - c) + 10.0_f64.powf(b - c)).log10()
221 }
222 }
223
224 pub fn scale_log_space_array_for_numeric_stability(array: &[f64]) -> Vec<f64> {
233 let max_value: f64 = *array
234 .iter()
235 .max_by_key(|x| OrderedFloat(**x))
236 .unwrap_or(&std::f64::NAN);
237 let result = array.iter().map(|x| *x - max_value).collect::<Vec<f64>>();
238 result
239 }
240
241 pub fn normalize_from_log10(
252 array: &[f64],
253 take_log10_of_output: bool,
254 keep_in_log_space: bool,
255 ) -> Vec<f64> {
256 let max_value: f64 = *array
259 .iter()
260 .max_by_key(|x| OrderedFloat(**x))
261 .unwrap_or(&std::f64::NAN);
262
263 if keep_in_log_space {
265 let array: Vec<f64> = array.iter().map(|x| *x - max_value).collect::<Vec<f64>>();
266 return array;
267 }
268 let mut normalized: Vec<f64> = (0..array.len())
270 .into_iter()
271 .map(|i| 10.0_f64.powf(array[i] - max_value))
272 .collect::<Vec<f64>>();
273
274 let sum: f64 = normalized.iter().sum::<f64>();
275
276 normalized.iter_mut().enumerate().for_each(|(i, x)| {
277 *x = *x / sum;
278 if take_log10_of_output {
279 *x = x.log10();
280 if *x < MathUtils::LOG10_P_OF_ZERO || x.is_infinite() {
281 *x = array[i] - max_value
282 }
283 }
284 });
285
286 normalized
287 }
288
289 pub fn is_valid_log10_probability(result: f64) -> bool {
290 result <= 0.0
291 }
292
293 pub fn log10_to_log(log10: f64) -> f64 {
294 log10 * (*LOG_10)
295 }
296 pub fn log10_one_minus_pow10(a: f64) -> f64 {
303 if a > 0.0 {
304 return std::f64::NAN;
305 }
306 if a == 0.0 {
307 return std::f64::NEG_INFINITY;
308 }
309
310 let b = a * *LOG_10;
311 return NaturalLogUtils::log1mexp(b) * *INV_LOG_10;
312 }
313
314 pub fn approximate_log10_sum_log10(a: f64, b: f64) -> f64 {
315 if a > b {
317 MathUtils::approximate_log10_sum_log10(b, a)
318 } else if a == std::f64::NEG_INFINITY {
319 b
320 } else {
321 let diff = b - a;
325
326 b + if diff < JacobianLogTable::MAX_TOLERANCE {
327 JacobianLogTable::get(diff)
328 } else {
329 0.0
330 }
331 }
332 }
333
334 pub fn approximate_log10_sum_log10_vec(
345 vals: &[f64],
346 from_index: usize,
347 to_index: usize,
348 ) -> f64 {
349 if from_index == to_index {
350 return std::f64::NEG_INFINITY;
351 };
352 let max_element_index = Self::max_element_index(vals, from_index, to_index);
353 let mut approx_sum = vals[max_element_index];
354
355 let mut i = from_index;
356 for val in vals[from_index..to_index].iter() {
357 if i == max_element_index || val == &std::f64::NEG_INFINITY {
358 i += 1;
359 continue;
360 };
361 let diff = approx_sum - val;
362 if diff < JacobianLogTable::MAX_TOLERANCE {
363 approx_sum += JacobianLogTable::get(diff);
364 };
365
366 i += 1;
367 }
368
369 return approx_sum;
370 }
371
372 pub fn well_formed_f64(val: f64) -> bool {
373 return !val.is_nan() && !val.is_infinite();
374 }
375
376 pub fn normal_distribution(mean: f64, sd: f64, x: f64) -> f64 {
384 assert!(sd >= 0.0, "Standard deviation must be >= 0.0");
385 return (-(x - mean) * (x - mean) / (2.0 * sd * sd)).exp() / (sd * *ROOT_TWO_PI);
391 }
392
393 pub fn normalize_sum_to_one(mut array: Vec<f64>) -> Vec<f64> {
403 if array.len() == 0 {
404 return array;
405 }
406
407 let sum = array.iter().sum::<f64>();
408 assert!(
409 sum >= 0.0,
410 "Values in probability array sum to a negative number"
411 );
412 array.iter_mut().for_each(|x| *x = *x / sum);
413
414 return array;
415 }
416
417 pub fn fast_bernoulli_entropy(p: f64) -> f64 {
424 let product = p * (1.0 - p);
425 return product * ((11.0 + 33.0 * product) / (2.0 + 20.0 * product));
426 }
427
428 pub fn is_valid_probability(result: f64) -> bool {
429 return result >= 0.0 && result <= 1.0;
430 }
431}
432
433#[derive(Debug, Clone, Copy)]
434pub struct RunningAverage {
435 mean: f64,
436 s: f64,
437 obs_count: usize,
438}
439
440impl RunningAverage {
441 pub fn new() -> RunningAverage {
442 RunningAverage {
443 mean: 0.0,
444 s: 0.0,
445 obs_count: 0,
446 }
447 }
448
449 pub fn add(&mut self, obs: f64) {
450 self.obs_count += 1;
451 let old_mean = self.mean;
452 self.mean += (obs - self.mean) / self.obs_count as f64;
453 self.s += (obs - old_mean) * (obs - self.mean)
454 }
455
456 pub fn add_all(&mut self, col: &[f64]) {
457 for obs in col {
458 self.add(*obs)
459 }
460 }
461
462 pub fn mean(&self) -> f64 {
463 self.mean
464 }
465
466 pub fn stddev(&self) -> f64 {
467 (self.s / (self.obs_count - 1) as f64).sqrt()
468 }
469
470 pub fn var(&self) -> f64 {
471 self.s / (self.obs_count - 1) as f64
472 }
473
474 pub fn obs_count(&self) -> usize {
475 self.obs_count
476 }
477}
478
479struct JacobianLogTable {}
483
484impl JacobianLogTable {
485 pub const MAX_TOLERANCE: f64 = 8.0;
488
489 pub const TABLE_STEP: f64 = 0.0001;
493 pub const INV_STEP: f64 = (1.0) / JacobianLogTable::TABLE_STEP;
494
495 pub fn get(difference: f64) -> f64 {
496 let index = (difference * JacobianLogTable::INV_STEP).round() as usize;
497 return cache[index];
498 }
499
500 }