use std::cmp;
use std::collections::HashMap;
use std::hash::BuildHasherDefault;
use crate::sketch_schemes::hashing::NoHashHasher;
use crate::sketch_schemes::KmerCount;
pub fn cardinality(sketch: &[KmerCount]) -> Result<u64, &'static str> {
if sketch.is_empty() {
return Ok(0u64);
}
Ok(
((sketch.len() - 1) as f32 / (sketch.last().unwrap().hash as f32 / usize::MAX as f32))
as u64,
)
}
pub fn hist(sketch: &[KmerCount]) -> Vec<u64> {
let mut max_count = 0;
let mut counts: HashMap<usize, u64, BuildHasherDefault<NoHashHasher>> =
HashMap::with_capacity_and_hasher(150_000, BuildHasherDefault::default());
for kmer in sketch {
max_count = cmp::max(max_count, u64::from(kmer.count));
counts
.entry(kmer.count as usize - 1)
.and_modify(|c| *c += 1)
.or_insert(1);
}
let mut counts_vec: Vec<u64> = Vec::with_capacity(max_count as usize);
for i in 0..max_count {
counts_vec.push(*counts.get(&(i as usize)).unwrap_or(&0))
}
counts_vec
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hist() {
let sketch = vec![
KmerCount {
hash: 1,
kmer: vec![],
count: 1,
extra_count: 0,
label: None,
},
KmerCount {
hash: 2,
kmer: vec![],
count: 1,
extra_count: 0,
label: None,
},
KmerCount {
hash: 3,
kmer: vec![],
count: 1,
extra_count: 0,
label: None,
},
];
let hist_data = hist(&sketch);
assert_eq!(hist_data.len(), 1);
assert_eq!(hist_data[0], 3);
let sketch = vec![
KmerCount {
hash: 1,
kmer: vec![],
count: 4,
extra_count: 0,
label: None,
},
KmerCount {
hash: 2,
kmer: vec![],
count: 2,
extra_count: 0,
label: None,
},
KmerCount {
hash: 3,
kmer: vec![],
count: 4,
extra_count: 0,
label: None,
},
KmerCount {
hash: 4,
kmer: vec![],
count: 3,
extra_count: 0,
label: None,
},
KmerCount {
hash: 3,
kmer: vec![],
count: 126497,
extra_count: 0,
label: None,
},
];
let hist_data = hist(&sketch);
assert_eq!(hist_data.len(), 126497);
assert_eq!(hist_data[0], 0);
assert_eq!(hist_data[1], 1);
assert_eq!(hist_data[2], 1);
assert_eq!(hist_data[3], 2);
assert_eq!(hist_data[126497 - 1], 1);
}
}