use crate::{utils::recode_index, ResultsRRA};
use adjustp::Procedure;
use anyhow::{bail, Result};
use getset::Getters;
use hashbrown::HashMap;
use ndarray::Array1;
use super::{
filter_alpha, group_sizes, normed_ranks,
permutations::run_permutations,
robust_rank::robust_rank_aggregation,
utils::{empirical_cdf, encode_index, select_ranks},
};
fn gene_rra(
current_idx: usize,
encodings: &[usize],
nranks: &Array1<f64>,
permutation_vectors: &HashMap<usize, Array1<f64>>,
alpha: f64,
) -> (f64, f64) {
let gene_ranks = select_ranks(current_idx, encodings, nranks);
let filtered = filter_alpha(&gene_ranks, alpha);
let score = robust_rank_aggregation(&filtered, gene_ranks.len());
(
score,
empirical_cdf(
score,
permutation_vectors
.get(&gene_ranks.len())
.expect("Unexpected missing key"),
),
)
}
fn generate_permutation_vectors(
n_genes: usize,
alpha: f64,
npermutations: usize,
sizes: &[usize],
seed: u64,
) -> HashMap<usize, Array1<f64>> {
sizes
.iter()
.map(|unique_size| {
(
*unique_size,
run_permutations(n_genes, alpha, npermutations, *unique_size, seed),
)
})
.map(|(u, v)| (u, Array1::from_vec(v)))
.collect::<HashMap<usize, Array1<f64>>>()
}
fn calculate_empirical_pvalues(
n_genes: usize,
encode: &[usize],
nranks: &Array1<f64>,
permutation_vectors: &HashMap<usize, Array1<f64>>,
alpha: f64,
) -> (Array1<f64>, Array1<f64>) {
let mut scores = Array1::zeros(n_genes);
let mut pvalues = Array1::zeros(n_genes);
(0..n_genes).for_each(|curr| {
let (score, pvalue) = gene_rra(curr, encode, nranks, permutation_vectors, alpha);
scores[curr] = score;
pvalues[curr] = pvalue;
});
(scores, pvalues)
}
#[derive(Getters)]
pub struct AlphaRRA {
#[getset(get = "pub")]
encode_map: HashMap<usize, String>,
#[getset(get = "pub")]
encode: Vec<usize>,
#[getset(get_copy = "pub")]
alpha: f64,
#[allow(dead_code)]
#[getset(get_copy = "pub")]
n_permutations: usize,
#[getset(get_copy = "pub")]
correction: Procedure,
#[getset(get_copy = "pub")]
n_genes: usize,
#[getset(get = "pub")]
permutation_vectors: HashMap<usize, Array1<f64>>,
#[allow(dead_code)]
#[getset(get_copy = "pub")]
seed: u64,
}
impl AlphaRRA {
pub fn new(
genes: &[String],
alpha: f64,
n_permutations: usize,
correction: Procedure,
seed: u64,
) -> Self {
let (encode_map, encode) = encode_index(genes);
let n_genes = encode_map.len();
let n_permutations = n_permutations * n_genes;
let permutation_vectors = generate_permutation_vectors(
n_genes,
alpha,
n_permutations,
&group_sizes(&encode),
seed,
);
Self {
encode_map,
encode,
alpha,
n_permutations,
correction,
n_genes,
permutation_vectors,
seed,
}
}
pub fn run(&self, pvalues: &Array1<f64>) -> Result<ResultsRRA> {
if pvalues.len() != self.encode.len() {
bail!("The number of p-values does not match the number of sgRNAs in the library");
}
let nranks = normed_ranks(pvalues);
let (scores, pvalues) = calculate_empirical_pvalues(
self.n_genes,
&self.encode,
&nranks,
&self.permutation_vectors,
self.alpha,
);
let names = recode_index(self.n_genes, &self.encode_map);
let result = ResultsRRA::new(names, scores, pvalues, self.correction);
Ok(result)
}
}