use hashbrown::{HashMap, HashSet};
use ndarray::Array1;
use crate::robust_rank::sort_array;
#[must_use]
pub fn argsort<A: PartialOrd>(array: &Array1<A>) -> Array1<usize> {
let mut order = Array1::range(0., array.len() as f64, 1.)
.iter()
.map(|x| *x as usize)
.collect::<Vec<usize>>();
order.sort_by(|a, b| array[*a].partial_cmp(&array[*b]).unwrap());
Array1::from_vec(order)
}
#[must_use]
pub fn rank<A: PartialOrd>(array: &Array1<A>) -> Array1<usize> {
let order = argsort(array);
argsort(&order)
}
#[must_use]
pub fn normed_ranks(array: &Array1<f64>) -> Array1<f64> {
rank(array)
.iter()
.map(|x| (x + 1) as f64)
.map(|x| x / array.len() as f64)
.collect()
}
#[must_use]
pub fn group_sizes(array: &[usize]) -> Vec<usize> {
let size_map = array.iter().fold(HashMap::new(), |mut map, x| {
*map.entry(*x).or_insert(0) += 1usize;
map
});
size_map
.values()
.fold(HashSet::new(), |mut set, x| {
set.insert(*x);
set
})
.iter()
.copied()
.collect()
}
#[must_use]
pub fn filter_alpha(array: &Array1<f64>, alpha: f64) -> Array1<f64> {
array.iter().filter(|x| **x < alpha).copied().collect()
}
pub fn filter_ranks(array: &Array1<f64>, rank: usize) -> Array1<f64> {
let sorted_array = sort_array(array);
sorted_array.iter().take(rank).copied().collect()
}
#[must_use]
pub fn empirical_cdf(obs: f64, null: &Array1<f64>) -> f64 {
let size = null.len();
let count = null.iter().filter(|x| **x <= obs).count();
(count + 1) as f64 / (size + 1) as f64
}
#[must_use]
pub fn encode_index(genes: &[String]) -> (HashMap<usize, String>, Vec<usize>) {
let mut total = 0usize;
let mut map = HashMap::with_capacity(genes.len());
let mut encoding = Vec::with_capacity(genes.len());
for g in genes {
if let Some(e) = map.get(g) {
encoding.push(*e);
} else {
map.insert(g, total);
encoding.push(total);
total += 1;
}
}
(
map.iter().map(|(k, v)| (*v, (*k).clone())).collect(),
encoding,
)
}
#[must_use]
pub fn recode_index(n_genes: usize, map: &HashMap<usize, String>) -> Vec<String> {
(0..n_genes)
.map(|x| map.get(&x).expect("Unexpected missing index").clone())
.collect()
}
#[must_use]
pub fn select_ranks(current_idx: usize, encodings: &[usize], ranks: &Array1<f64>) -> Array1<f64> {
encodings
.iter()
.zip(ranks.iter())
.filter(|(idx, _ranks)| **idx == current_idx)
.map(|(_, ranks)| *ranks)
.collect()
}
#[cfg(test)]
mod testing {
use super::{argsort, empirical_cdf, filter_alpha, group_sizes, normed_ranks, rank};
use super::{encode_index, select_ranks};
use ndarray::{array, Array1};
use ndarray_rand::rand_distr::Uniform;
use ndarray_rand::RandomExt;
use statrs::statistics::Statistics;
#[test]
fn test_encoding() {
let names = ["g.0", "g.1", "g.0", "g.2"]
.iter()
.map(|x| (*x).to_string())
.collect::<Vec<String>>();
let (encode_map, encoding) = encode_index(&names);
assert_eq!(encoding, vec![0, 1, 0, 2]);
assert_eq!(encode_map.get(&0), Some(&"g.0".to_string()));
assert_eq!(encode_map.get(&1), Some(&"g.1".to_string()));
assert_eq!(encode_map.get(&2), Some(&"g.2".to_string()));
assert_eq!(encode_map.len(), 3);
}
#[test]
fn test_selection() {
let indices = vec![0, 1, 2, 0, 1, 2];
let scores = array![0.5, 0., 0., 0.5, 0., 0.];
let selection = select_ranks(0, &indices, &scores);
assert_eq!(selection, array![0.5, 0.5]);
}
#[test]
fn test_argsort() {
let floats = array![0.3, 0.2, 0.1, 0.4];
let order = argsort(&floats);
assert_eq!(order, array![2, 1, 0, 3]);
}
#[test]
fn test_argsort_precision() {
let floats = array![3e-300, 2e-300, 1e-300, 4e-300];
let order = argsort(&floats);
assert_eq!(order, array![2, 1, 0, 3]);
}
#[test]
fn test_ranks() {
let floats = array![0.3, 0.2, 0.1, 0.4];
let ranks = rank(&floats);
assert_eq!(ranks, array![2, 1, 0, 3]);
}
#[test]
fn test_normed_ranks() {
let floats = array![0.3, 0.2, 0.1, 0.4];
let truth = (array![2., 1., 0., 3.] + 1.) / 4.;
let nranks = normed_ranks(&floats);
assert_eq!(nranks, truth);
}
#[test]
fn test_group_sizes() {
let groups = vec![0, 0, 1, 1, 1, 2];
let truth = [2, 3, 1];
let sizes = group_sizes(&groups);
truth.iter().for_each(|x| assert!(sizes.contains(x)));
assert_eq!(sizes.len(), 3);
}
#[test]
fn test_filter_alpha() {
for _ in 0..100 {
let num_samples = 5;
let normed_ranks = Array1::random((num_samples,), Uniform::new(0., 1.));
let alpha = 0.3;
let filtered = filter_alpha(&normed_ranks, alpha);
if !filtered.is_empty() {
assert!(filtered.max() < alpha);
}
}
}
#[test]
fn test_empirical_cdf() {
let obs = 0.3;
let null = Array1::range(0., 1., 0.01);
let cdf = empirical_cdf(obs, &null);
assert_eq!(cdf, 0.31683168316831684);
}
#[test]
fn test_recoding() {
let names = ["g.0", "g.1", "g.0", "g.2"]
.iter()
.map(|x| (*x).to_string())
.collect::<Vec<String>>();
let (encode_map, _encoding) = encode_index(&names);
let recoded = super::recode_index(3, &encode_map);
assert_eq!(recoded, vec!["g.0", "g.1", "g.2"]);
}
}