use std::collections::HashMap;
use datasize::DataSize;
use either::Either;
use once_cell::sync::OnceCell;
use rand::{
rngs::StdRng,
seq::{IteratorRandom, SliceRandom},
SeedableRng,
};
const SAMPLE_SIZE: usize = 50;
fn sampling_rng(len: usize) -> StdRng {
let mut seed = <StdRng as SeedableRng>::Seed::default();
let len_be = len.to_be_bytes();
for (b1, b2) in seed.iter_mut().zip(len_be.iter()) {
*b1 ^= *b2;
}
StdRng::from_seed(seed)
}
fn scale_sample(len: usize, sampled: usize) -> usize {
sampled * len / SAMPLE_SIZE
}
#[allow(clippy::ptr_arg)]
pub fn vec_sample<T>(vec: &Vec<T>) -> usize
where
T: DataSize,
{
if vec.len() < SAMPLE_SIZE {
vec.estimate_heap_size()
} else {
let base_size = vec.capacity() * size_of::<T>();
let mut rng = sampling_rng(vec.len());
let sampled = vec
.as_slice()
.choose_multiple(&mut rng, SAMPLE_SIZE)
.map(DataSize::estimate_heap_size)
.sum();
base_size + scale_sample(vec.len(), sampled)
}
}
pub fn hashmap_sample<K, V>(map: &HashMap<K, V>) -> usize
where
K: DataSize,
V: DataSize,
{
fn estimate_hashbrown_rawtable<T>(capacity: usize) -> usize {
let buckets = if capacity < 8 {
if capacity < 4 {
4
} else {
8
}
} else {
(capacity * 8 / 7).next_power_of_two()
};
let size = size_of::<T>();
let ctrl_offset = size * buckets;
ctrl_offset + buckets
}
if map.len() < SAMPLE_SIZE {
map.estimate_heap_size()
} else {
let base_size = estimate_hashbrown_rawtable::<(K, V)>(map.capacity());
let mut rng = sampling_rng(map.len());
let sampled = map
.iter()
.choose_multiple(&mut rng, SAMPLE_SIZE)
.into_iter()
.map(|(k, v)| k.estimate_heap_size() + v.estimate_heap_size())
.sum();
base_size + scale_sample(map.len(), sampled)
}
}
pub(crate) fn once_cell<T>(cell: &OnceCell<T>) -> usize
where
T: DataSize,
{
cell.get().map_or(0, |value| value.estimate_heap_size())
}
pub(crate) fn maybe_either<T, U>(either: &Option<Either<T, U>>) -> usize
where
T: DataSize,
U: DataSize,
{
match either {
None => 0,
Some(Either::Left(left)) => left.estimate_heap_size(),
Some(Either::Right(right)) => right.estimate_heap_size(),
}
}
#[cfg(test)]
#[allow(clippy::assertions_on_constants)] mod tests {
use std::collections::HashMap;
use datasize::DataSize;
use super::{hashmap_sample, vec_sample, SAMPLE_SIZE};
#[test]
fn vec_sample_below_sample_size() {
let data: Vec<Box<u32>> = vec![];
assert_eq!(vec_sample(&data), data.estimate_heap_size());
assert!(SAMPLE_SIZE > 3);
let data2: Vec<Box<u32>> = vec![Box::new(1), Box::new(2), Box::new(3)];
assert_eq!(vec_sample(&data2), data2.estimate_heap_size());
}
#[test]
fn vec_sample_above_sample_size() {
let num_items = SAMPLE_SIZE * 5;
let data: Vec<Vec<u32>> = (0..num_items)
.map(|_| vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
.collect();
assert_eq!(vec_sample(&data), data.estimate_heap_size());
}
#[test]
fn hashmap_sample_below_sample_size() {
let data: HashMap<u32, Box<u32>> = HashMap::new();
assert_eq!(hashmap_sample(&data), data.estimate_heap_size());
assert!(SAMPLE_SIZE > 3);
let mut data2: HashMap<u32, Box<u32>> = HashMap::new();
data2.insert(1, Box::new(1));
data2.insert(2, Box::new(2));
data2.insert(3, Box::new(3));
assert_eq!(hashmap_sample(&data2), data2.estimate_heap_size());
}
#[test]
fn hashmap_sample_above_sample_size() {
let num_items = SAMPLE_SIZE * 5;
let data: HashMap<usize, Vec<u32>> = (0..num_items)
.map(|idx| (idx, vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
.collect();
assert_eq!(hashmap_sample(&data), data.estimate_heap_size());
}
}