use indicatif::{ProgressBar, ProgressStyle};
use std::collections::HashSet;
use std::hash::Hash;
pub fn vec2set<T: Eq + Hash>(x: Vec<T>) -> HashSet<T> {
x.into_iter().collect::<HashSet<T>>()
}
pub fn seq_to_int(seq: &str) -> u64 {
assert!(seq.len() <= 32); let s: String = seq
.chars()
.map(|x| match x {
'A' => '0',
'C' => '1',
'G' => '2',
'T' => '3',
c => panic!("unkown seq character {}", c),
})
.collect();
u64::from_str_radix(&s, 4).unwrap()
}
pub fn int_to_seq(i: u64, seq_len: usize) -> String {
let mut q = i;
let mut result: Vec<u64> = Vec::with_capacity(seq_len);
while q >= 4 {
let quotient = q / 4;
let remainder = q % 4;
result.push(remainder);
q = quotient;
}
result.push(q);
while result.len() < seq_len {
result.push(0);
}
result.reverse();
let s: String = result
.iter()
.map(|x| match x {
0 => 'A',
1 => 'C',
2 => 'G',
3 => 'T',
c => panic!("unkown seq character {}", c),
})
.collect();
s
}
pub fn get_progressbar(total: u64) -> ProgressBar {
let bar = ProgressBar::new(total);
bar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise} ETA {eta}] {bar:40.cyan/blue} {pos}/{len} {per_sec}")
.unwrap()
.progress_chars("##-"),
);
bar
}
pub fn get_spinner() -> indicatif::ProgressBar{
let bar = indicatif::ProgressBar::new_spinner();
bar.set_style(
indicatif::ProgressStyle::default_bar()
.template("[{elapsed_precise}] {pos} {per_sec}")
.unwrap()
.progress_chars("##-"),
);
bar
}
pub mod argsort {
use std::cmp::Ordering;
#[derive(PartialEq, PartialOrd)]
struct NonNan(f64);
impl NonNan {
fn new(val: f64) -> Option<NonNan> {
if val.is_nan() {
None
} else {
Some(NonNan(val))
}
}
}
impl Eq for NonNan {}
impl Ord for NonNan {
fn cmp(&self, other: &NonNan) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
pub fn argsort<T: Ord>(slice: &[T]) -> Vec<usize> {
let n = slice.len();
let mut keys: Vec<_> = (0..n).collect();
keys.sort_by_key(|x| &slice[*x]);
keys
}
pub fn argsort_float(fvec: &Vec<f64>, ascending: bool) -> Vec<usize> {
let _fvec: Vec<NonNan> = fvec
.iter()
.map(|x| NonNan::new(*x).unwrap_or_else(|| panic!("Nan values in {fvec:?}")))
.collect();
let mut fvec_sorted_ix = argsort(&_fvec);
if ascending {
fvec_sorted_ix.reverse();
}
fvec_sorted_ix
}
pub fn argmax_float(fvec: &Vec<f64>) -> (usize, f64) {
let ix = argsort_float(fvec, true);
let i = ix[0];
let value = fvec[i];
(i, value)
}
pub fn argmin_float(fvec: &Vec<f64>) -> (usize, f64) {
let ix = argsort_float(fvec, false);
let i = ix[0];
let value = fvec[i];
(i, value)
}
#[test]
fn test_argmin() {
let v = vec![1.0, -1.0, 10.0, 0.0];
assert_eq!(argmin_float(&v), (1, -1.0));
}
#[test]
fn test_argmax() {
let v = vec![1.0, -1.0, 10.0, 0.0];
assert_eq!(argmax_float(&v), (2, 10.0));
}
}
pub fn min_argmin<T: Ord+ Copy>(x: &[T]) -> (T, Vec<usize>){
if x.len() == 1 {
(x[0], vec![0])
} else {
let mut current_min: &T = x.first().expect("x has to contain an element");
let mut indices: Vec<usize> = Vec::with_capacity(x.len());
indices.push(0);
for (ix, el) in x.iter().enumerate().skip(1) {
match el.cmp(current_min) {
std::cmp::Ordering::Less => {
current_min = el;
indices = vec![ix]
},
std::cmp::Ordering::Equal => {
indices.push(ix)
},
std::cmp::Ordering::Greater => { },
}
}
(*current_min, indices)
}
}
#[test]
fn test_min_argmin() {
let x = vec![4,3,2,1];
assert_eq!(
min_argmin(&x),
(1, vec![3])
);
let x = vec![4,1,2,1];
assert_eq!(
min_argmin(&x),
(1, vec![1,3])
);
let x = vec![4, 2, 3, 2, 1, 2, 1, 4];
assert_eq!(
min_argmin(&x),
(1, vec![4, 6])
);
}
#[cfg(test)]
mod tests {
#[test]
fn encode_seq() {
use crate::utils::seq_to_int;
assert_eq!(seq_to_int("A"), 0);
assert_eq!(seq_to_int("C"), 1);
assert_eq!(seq_to_int("G"), 2);
assert_eq!(seq_to_int("T"), 3);
assert_eq!(seq_to_int("GCCA"), 148);
}
#[test]
fn decode_seq() {
use crate::utils::int_to_seq;
assert_eq!(int_to_seq(0, 1), "A");
assert_eq!(int_to_seq(1, 1), "C");
assert_eq!(int_to_seq(2, 1), "G");
assert_eq!(int_to_seq(3, 1), "T");
assert_eq!(int_to_seq(0, 3), "AAA");
assert_eq!(int_to_seq(1, 3), "AAC");
assert_eq!(int_to_seq(2, 3), "AAG");
assert_eq!(int_to_seq(3, 3), "AAT");
assert_eq!(int_to_seq(4, 2), "CA");
assert_eq!(int_to_seq(5, 2), "CC");
assert_eq!(int_to_seq(6, 2), "CG");
assert_eq!(int_to_seq(7, 2), "CT");
assert_eq!(int_to_seq(148, 4), "GCCA");
}
}