phantompurger_rs/utils.rs
1use std::collections::HashMap;
2use bustools_core::{consistent_genes::Ec2GeneMapper, io::BusFolder};
3
4/// creates a Mapper for each BusFolder in the dictionalry
5pub (crate) fn ec_mapper_dict_from_busfolders(busfolders: &HashMap<String, BusFolder>, t2g_file: &str) -> HashMap<String, Ec2GeneMapper> {
6 valmap_ref(|b| b.make_mapper(t2g_file), busfolders)
7}
8
9
10/// logsumexp trick
11pub (crate) fn logsumexp(x: &[f64]) -> f64 {
12 // getting the max, stupid f64, cant do .iter().max()
13 let c = x.iter().reduce(|a, b| if a > b { a } else { b }).unwrap();
14 let mut exp_vec: Vec<f64> = Vec::with_capacity(x.len()); // storing exp(x-c)
15 for el in x.iter() {
16 exp_vec.push((el - c).exp());
17 }
18 let z: f64 = c + exp_vec.iter().sum::<f64>().ln();
19 z
20}
21
22// Takes a Hashmap K->V and transforms all values, leaving the keys as they are
23// note that this consumes the hashmap!
24pub fn valmap<K, V, V2, F>(fun: F, the_map: HashMap<K, V>) -> HashMap<K, V2>
25where
26 F: Fn(V) -> V2,
27 K: Eq + std::hash::Hash,
28{
29 let r: HashMap<K, V2> = the_map.into_iter().map(|(k, v)| (k, fun(v))).collect();
30 r
31}
32
33/// same as `valmap`, except it doesnt consume the map. Clones the key!
34pub fn valmap_ref<K, V, V2, F>(fun: F, the_map: &HashMap<K, V>) -> HashMap<K, V2>
35where
36 F: Fn(&V) -> V2,
37 K: Eq + std::hash::Hash + Clone,
38{
39 let r: HashMap<K, V2> = the_map.iter().map(|(k, v)| (k.clone(), fun(v))).collect();
40 r
41}
42
43#[test]
44fn test_valmap() {
45 let h: HashMap<&str, usize> = vec![("A", 1), ("B", 2)].into_iter().collect();
46
47 // basics
48 let r = valmap(|x| x.to_string(), h);
49
50 assert_eq!(r.get("A").unwrap(), "1");
51 assert_eq!(r.get("B").unwrap(), "2");
52
53 // capturing some context
54 let h: HashMap<&str, usize> = vec![("A", 1), ("B", 2)].into_iter().collect();
55 let inc = 10_usize;
56 let r = valmap(|x| x + inc, h);
57
58 assert_eq!(*r.get("A").unwrap(), 11);
59 assert_eq!(*r.get("B").unwrap(), 12);
60}
61
62// fn logfactorial(x: usize) -> f64{
63// (1..(x+1)).map(|q|(q as f64).ln()).sum()
64// }
65
66// fn log_binomial_coeff(n: usize, k: usize) -> f64{
67// // this function is RIDICULOUSLY SLOW!!!
68// logfactorial(n) - logfactorial(k) - logfactorial(n-k)
69// }
70
71// pub fn binomial_loglike(x: usize, n: usize, p: f64) -> f64{
72// // for a single datapoint
73
74// //edge cases
75// if p==1.0{
76// if x==n{
77// return 0.0 // log(p=1)
78// }
79// else{
80// return 0_f64.ln() //zero prob
81// }
82// }
83// if p==0.0{
84// if x==0{
85// return 0.0 // log(p=1)
86// }
87// else{
88// return 0_f64.ln() //zero prob
89// }
90// }
91
92// let logcoeff = log_binomial_coeff(n, x);
93// (x as f64) * p.ln() + ((n-x) as f64) * (1.0-p).ln() + logcoeff
94
95// }
96
97pub (crate) fn linspace(min: f64, max: f64, n: usize) -> Vec<f64> {
98 assert!(n >= 2);
99 let delta = (max - min) / ((n - 1) as f64);
100 let mut x = Vec::with_capacity(n);
101 for i in 0..n {
102 let y = min + (i as f64) * delta;
103 x.push(y);
104 }
105 x
106}
107
108pub (crate) fn logspace(logmin: f64, logmax: f64, n: usize) -> Vec<f64> {
109 let logx = linspace(logmin, logmax, n);
110 logx.into_iter().map(|y| 10_f64.powf(y)).collect()
111}
112
113
114
115#[cfg(test)]
116mod tests {
117
118 use crate::utils::{linspace, logspace};
119
120 // use crate::utils::{logfactorial, log_binomial_coeff, binomial_loglike, linspace, logspace, logsumexp};
121 // use statrs::assert_almost_eq;
122
123 // #[test]
124 // fn test_losumexp(){
125 // assert_eq!(logsumexp(&vec![0.0]), 0.0);
126 // assert_eq!(logsumexp(&vec![10.0]), 10.0);
127 // assert_eq!(logsumexp(&vec![0.0, 0.0, 0.0]), 3_f64.ln());
128 // assert_eq!(logsumexp(&vec![1.0, 1.0, 1.0]), 3_f64.ln() + 1.0); // log[3 e]
129 // }
130
131 // #[test]
132 // fn test_logfac(){
133 // let tolerance = 0.000000001;
134 // assert_almost_eq!(logfactorial(0), 0.0, tolerance);
135 // assert_almost_eq!(logfactorial(1), 0.0, tolerance);
136 // assert_almost_eq!(logfactorial(2), 2_f64.ln(), tolerance);
137 // assert_almost_eq!(logfactorial(3), 6_f64.ln(), tolerance);
138 // }
139
140 // #[test]
141 // fn test_log_binomial_coeff(){
142 // let tolerance = 0.000000001;
143 // assert_almost_eq!(log_binomial_coeff(5, 1), 5_f64.ln(), tolerance);
144 // assert_almost_eq!(log_binomial_coeff(5, 5), 1_f64.ln(), tolerance);
145 // }
146
147 // #[test]
148 // fn test_binomial_loglike(){
149 // assert_eq!(
150 // binomial_loglike(1, 1, 0.5 ),
151 // 0.5_f64.ln()
152 // );
153
154 // // edge cases
155 // assert_eq!(
156 // binomial_loglike(1, 1, 1.0 ),
157 // 1_f64.ln()
158 // );
159 // assert_eq!(
160 // binomial_loglike(0, 1, 0.0 ),
161 // 1_f64.ln()
162 // );
163 // }
164 #[test]
165 fn test_linspace() {
166 let min = 1.0;
167 let max = 10.0;
168 let n = 10;
169 let x = linspace(min, max, n);
170 // println!("{:?}",x);
171 assert_eq!(x.len(), n);
172 assert_eq!(*x.first().unwrap(), min);
173 assert_eq!(*x.last().unwrap(), max);
174 }
175
176 #[test]
177 fn test_logspace() {
178 let logmin = 0.0;
179 let logmax = 1.0;
180 let n = 10;
181 let x = logspace(logmin, logmax, n);
182 // println!("{:?}",x);
183 assert_eq!(x.len(), n);
184 assert_eq!(*x.first().unwrap(), 1.0);
185 assert_eq!(*x.last().unwrap(), 10.0);
186 }
187
188 #[test]
189 fn test_logspace2() {
190 let logmin = -6.0;
191 let logmax = 0.0;
192 let n = 7;
193 let x = logspace(logmin, logmax, n);
194 println!("{:?}", x);
195 assert_eq!(x.len(), n);
196 assert_eq!(*x.first().unwrap(), 1e-6);
197 assert_eq!(*x.last().unwrap(), 1.0);
198 }
199}