stats_utils/
lib.rs

1// Copyright (c) 2018 10X Genomics, Inc. All rights reserved.
2
3// Compute some stats.
4// ◼ Presumably much of this is available elsewhere.
5
6use std::f64;
7
8// Compute N50 of some numbers.
9
10pub fn n50(v: &[i32]) -> i32 {
11    if v.is_empty() {
12        return 0;
13    }
14    for n in v.iter() {
15        assert!(*n > 0);
16    }
17    let mut sum: i64 = 0;
18    let mut half: i64 = 0;
19    for n in v.iter() {
20        sum += i64::from(*n);
21    }
22    let mut vs = v.to_owned();
23    vs.sort_unstable();
24    for i in 0..vs.len() {
25        half += i64::from(vs[i]);
26        if 2 * half == sum && i < vs.len() - 1 {
27            return (vs[i] + vs[i + 1]) / 2;
28        }
29        if 2 * half >= sum {
30            return vs[i];
31        }
32    }
33    0 // never executed
34}
35
36pub fn n90(v: &[i32]) -> i32 {
37    if v.is_empty() {
38        return 0;
39    }
40    for n in v.iter() {
41        assert!(*n > 0);
42    }
43    let mut sum: i64 = 0;
44    let mut part: i64 = 0;
45    for n in v.iter() {
46        sum += i64::from(*n);
47    }
48    let mut vs = v.to_owned();
49    vs.sort_unstable();
50    for i in 0..vs.len() {
51        part += i64::from(vs[i]);
52        if 10 * (part as i64) == 9 * sum as i64 && i < vs.len() - 1 {
53            return (vs[i] + vs[i + 1]) / 2;
54        }
55        if part as f64 / sum as f64 >= 0.9_f64 {
56            return vs[i];
57        }
58    }
59    0 // never executed
60}
61
62// Compute mean of some numbers, returning zero on empty vector.
63
64pub fn mean(v: &[i32]) -> f64 {
65    let sum1 = v.len() as f64;
66    let mut sum2 = 0_f64;
67    for x in v.iter() {
68        sum2 += f64::from(*x);
69    }
70    if sum1 == 0_f64 {
71        return 0_f64;
72    }
73    sum2 / sum1
74}
75
76// Compute "length-weighted" mean of some numbers.  Normally this would be applied
77// to positive numbers.  Returns 0 if the sum of the numbers is zero.
78// ◼ Not sure what the right term for this is.  The word "length" is confusing.
79
80pub fn len_weighted_mean(v: &[i32]) -> f64 {
81    let mut sum1 = 0_f64;
82    let mut sum2 = 0_f64;
83    for x in v.iter() {
84        sum1 += f64::from(*x);
85        sum2 += f64::from(*x) * f64::from(*x);
86    }
87    if sum1 == 0_f64 {
88        return 0_f64;
89    }
90    sum2 / sum1
91}
92
93// Compute %CV of some numbers, returning zero on an empty vector.
94// Divides by n, not n-1.
95
96pub fn cv(v: &[f64]) -> f64 {
97    if v.is_empty() {
98        return 0_f64;
99    }
100    let n = v.len() as f64;
101    let mut mean = 0_f64;
102    for x in v {
103        mean += x;
104    }
105    mean /= n;
106    let mut x = 0_f64;
107    for y in v {
108        x += (y - mean) * (y - mean);
109    }
110    x = (x / n).sqrt();
111    100.0_f64 * x / mean
112}
113
114// Compute absolute value of difference between two numbers.
115
116pub fn abs_diff(a: usize, b: usize) -> usize {
117    if a <= b {
118        return b - a;
119    }
120    a - b
121}
122
123pub fn abs_diff_f64(a: f64, b: f64) -> f64 {
124    if a <= b {
125        return b - a;
126    }
127    a - b
128}
129
130// Compute percent ratio.
131
132pub fn percent_ratio(a: usize, b: usize) -> f64 {
133    100_f64 * a as f64 / b as f64
134}
135
136// Code to make a random vector.  This uses a hardcoded pseudo-random number
137// generator so that the behavior can be exactly reproduced in another language.
138
139pub fn make_random_vec(x: &mut Vec<i64>, n: usize) {
140    x.resize(n, 0);
141    x[0] = 0;
142    for i in 1..x.len() {
143        x[i] = 6_364_136_223_846_793_005i64
144            .wrapping_mul(x[i - 1])
145            .wrapping_add(1_442_695_040_888_963_407);
146    }
147}
148
149// binomial_sum( n, k, p ): return sum_{i=0..k} choose(n,i) * p^i * (1-p)^(n-i)
150//
151// No attempt has been made to make this efficient or to pay attention to
152// accuracy or overflow problems.
153
154pub fn binomial_sum(n: usize, k: usize, p: f64) -> f64 {
155    assert!(n >= 1);
156    assert!(k <= n);
157    let mut sum = 0.0;
158    let mut choose = 1.0;
159    for _ in 0..n {
160        choose *= 1.0 - p;
161    }
162    let q = p / (1.0 - p);
163    for i in 0..=k {
164        sum += choose;
165        choose *= (n - i) as f64;
166        choose /= (i + 1) as f64;
167        choose *= q;
168    }
169    sum
170}