use core::{fmt, ops::Add};
use std::{collections::HashMap, vec::Vec};
use crate::{
allocator::Allocator,
raw::{InnerNode, LeafNode, NodeType, OpaqueNodePtr},
visitor::{Visitable, Visitor},
AsBytes, TreeMap,
};
#[derive(Debug)]
pub struct TreeStatsCollector {
current: TreeStats,
}
impl TreeStatsCollector {
pub fn collect<K: AsBytes, V, A: Allocator, const PREFIX_LEN: usize>(
tree: &TreeMap<K, V, PREFIX_LEN, A>,
) -> Option<TreeStats> {
tree.state
.as_ref()
.map(|state| unsafe { Self::collect_ptr(&state.root) })
}
pub unsafe fn collect_ptr<K: AsBytes, V, const PREFIX_LEN: usize>(
root: &OpaqueNodePtr<K, V, PREFIX_LEN>,
) -> TreeStats {
let mut collector = TreeStatsCollector {
current: TreeStats::default(),
};
root.visit_with(&mut collector);
collector.current
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct InnerNodeStats {
pub count: usize,
pub total_slots: usize,
pub sum_slots: usize,
pub total_header_bytes: usize,
pub sum_prefix_len_bytes: usize,
pub sum_capped_prefix_len_bytes: usize,
pub max_prefix_len_bytes: usize,
pub mem_usage: usize,
}
impl InnerNodeStats {
fn aggregate_data<K, V, const PREFIX_LEN: usize, N>(&mut self, t: &N)
where
N: InnerNode<PREFIX_LEN, Key = K, Value = V>,
{
self.count += 1;
self.total_slots += *N::TYPE.capacity_range().end();
self.sum_slots += t.header().num_children();
self.total_header_bytes += PREFIX_LEN;
self.sum_prefix_len_bytes += t.header().prefix_len();
self.sum_capped_prefix_len_bytes += t.header().capped_prefix_len();
self.max_prefix_len_bytes = self.max_prefix_len_bytes.max(t.header().prefix_len());
self.mem_usage += core::mem::size_of_val(t);
}
pub fn free_slots(&self) -> usize {
self.total_slots - self.sum_slots
}
pub fn percentage_slots(&self) -> f64 {
self.sum_slots as f64 / self.total_slots as f64
}
pub fn avg_prefix_len(&self) -> f64 {
self.sum_prefix_len_bytes as f64 / self.count as f64
}
pub fn avg_capped_prefix_len(&self) -> f64 {
self.sum_capped_prefix_len_bytes as f64 / self.count as f64
}
pub fn free_header_bytes(&self) -> usize {
self.total_header_bytes - self.sum_capped_prefix_len_bytes
}
pub fn percentage_header_bytes(&self) -> f64 {
self.sum_capped_prefix_len_bytes as f64 / self.total_header_bytes as f64
}
pub fn node_size(&self) -> Option<usize> {
self.mem_usage.checked_div(self.count)
}
}
impl Add for InnerNodeStats {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self {
count: self.count + rhs.count,
total_slots: self.total_slots + rhs.total_slots,
sum_slots: self.sum_slots + rhs.sum_slots,
total_header_bytes: self.total_header_bytes + rhs.total_header_bytes,
sum_prefix_len_bytes: self.sum_prefix_len_bytes + rhs.sum_prefix_len_bytes,
sum_capped_prefix_len_bytes: self.sum_capped_prefix_len_bytes
+ rhs.sum_capped_prefix_len_bytes,
max_prefix_len_bytes: self.max_prefix_len_bytes.max(rhs.max_prefix_len_bytes),
mem_usage: self.mem_usage + rhs.mem_usage,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct LeafStats {
pub count: usize,
pub sum_key_bytes: usize,
pub mem_usage: usize,
}
impl Add for LeafStats {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self {
count: self.count + rhs.count,
sum_key_bytes: self.sum_key_bytes + rhs.sum_key_bytes,
mem_usage: self.mem_usage + rhs.mem_usage,
}
}
}
#[derive(Clone, PartialEq, Eq, Default)]
pub struct FixedOrderNodeStats(HashMap<NodeType, InnerNodeStats>);
impl FixedOrderNodeStats {
pub fn get(&self, node_type: NodeType) -> Option<&InnerNodeStats> {
self.0.get(&node_type)
}
}
impl core::ops::Index<NodeType> for FixedOrderNodeStats {
type Output = InnerNodeStats;
fn index(&self, index: NodeType) -> &Self::Output {
self.get(index).unwrap()
}
}
impl fmt::Debug for FixedOrderNodeStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut node_types = self.0.keys().collect::<Vec<_>>();
node_types.sort();
f.debug_map()
.entries(
node_types
.into_iter()
.map(|node_type| (node_type, self.0.get(node_type).unwrap())),
)
.finish()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct TreeStats {
pub inner_node: FixedOrderNodeStats,
pub tree: InnerNodeStats,
pub leaf: LeafStats,
}
impl TreeStats {
pub fn total_memory_usage(&self) -> usize {
self.tree.mem_usage + self.leaf.mem_usage
}
pub fn bytes_per_entry(&self) -> f64 {
self.tree.mem_usage as f64 / self.leaf.count as f64
}
pub fn bytes_per_entry_with_leaf(&self) -> f64 {
self.total_memory_usage() as f64 / self.leaf.count as f64
}
}
impl<K, V, const PREFIX_LEN: usize> Visitor<K, V, PREFIX_LEN> for TreeStatsCollector
where
K: AsBytes,
{
type Output = ();
fn default_output(&self) -> Self::Output {}
fn combine_output(&self, _: Self::Output, _: Self::Output) -> Self::Output {}
fn visit_inner_node<N>(&mut self, t: &N) -> Self::Output
where
N: InnerNode<PREFIX_LEN, Key = K, Value = V> + Visitable<K, V, PREFIX_LEN>,
{
t.super_visit_with(self);
self.current
.inner_node
.0
.entry(N::TYPE)
.or_default()
.aggregate_data(t);
self.current.tree.aggregate_data(t);
}
fn visit_leaf(&mut self, t: &LeafNode<K, V, PREFIX_LEN>) -> Self::Output {
self.current.leaf.count += 1;
self.current.leaf.sum_key_bytes += t.key_ref().as_bytes().len();
self.current.leaf.mem_usage += core::mem::size_of_val(t);
}
}
impl fmt::Display for TreeStats {
#[rustfmt::skip]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let TreeStats {
inner_node,
tree,
..
} = self;
write!(f, "{self:#?}")?;
f.write_str("\n")?;
f.write_fmt(format_args!("memory usage (inner nodes): {} bytes\n", tree.mem_usage))?;
f.write_fmt(format_args!("memory usage (inner nodes + leaf): {} bytes\n", self.total_memory_usage()))?;
f.write_fmt(format_args!("bytes/entry: {:.5}\n", self.bytes_per_entry()))?;
f.write_fmt(format_args!("bytes/entry (with leaf): {:.5}\n", self.bytes_per_entry_with_leaf()))?;
f.write_fmt(format_args!("avg prefix length: {:.5} bytes\n", tree.avg_prefix_len()))?;
f.write_fmt(format_args!("avg capped prefix length: {:.5} bytes\n", tree.avg_capped_prefix_len()))?;
f.write_fmt(format_args!("% used header bytes (0-1): {:.5}\n", tree.percentage_header_bytes()))?;
f.write_fmt(format_args!("% used slots (0-1): {:.5}\n", tree.percentage_slots()))?;
let mut node_types = inner_node.0.keys().collect::<Vec<_>>();
node_types.sort();
for node_type in node_types {
let stats = inner_node.0.get(node_type).unwrap();
let label = format!("{node_type:?} size:");
f.write_fmt(format_args!("{label:<34} {:?} bytes\n", stats.node_size()))?;
}
f.write_fmt(format_args!("max prefix length: {} bytes", tree.max_prefix_len_bytes))?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use alloc::vec::Vec;
#[cfg(not(miri))]
use expect_test::expect_file;
use super::*;
use crate::{testing::generate_key_fixed_length, TreeMap};
#[test]
#[cfg(not(miri))]
fn mostly_empty_tree_stats_fixed_length_tree() {
let mut tree = TreeMap::new();
for (k, v) in generate_key_fixed_length([1, 1, 1, 1])
.enumerate()
.map(|(a, b)| (b, a))
{
tree.try_insert(k, v).unwrap();
}
let stats = TreeStatsCollector::collect(&tree).unwrap();
expect_file!["./tree_stats/mostly_empty_tree_stats_fixed_length_tree.expect"]
.assert_debug_eq(&stats);
}
#[test]
#[cfg(not(miri))]
fn full_tree_stats_fixed_length_tree() {
let mut tree = TreeMap::new();
for (k, v) in generate_key_fixed_length([15, 3])
.enumerate()
.map(|(a, b)| (b, a))
{
tree.try_insert(k, v).unwrap();
}
let stats = TreeStatsCollector::collect(&tree).unwrap();
expect_file!["./tree_stats/full_tree_stats_fixed_length_tree.expect"]
.assert_debug_eq(&stats);
}
#[test]
fn test_inner_node_stats_calculations() {
let stats1 = InnerNodeStats {
count: 0,
mem_usage: 100,
sum_capped_prefix_len_bytes: 50,
total_header_bytes: 100,
..Default::default()
};
assert_eq!(stats1.node_size(), None);
assert_eq!(stats1.percentage_header_bytes(), 0.5);
let stats2 = InnerNodeStats {
count: 10,
mem_usage: 100,
sum_capped_prefix_len_bytes: 50,
total_header_bytes: 100,
..Default::default()
};
assert_eq!(stats2.node_size(), Some(10));
assert_eq!(stats2.percentage_header_bytes(), 0.5);
let stats3 = InnerNodeStats {
total_header_bytes: 0,
sum_capped_prefix_len_bytes: 0,
..Default::default()
};
assert!(stats3.percentage_header_bytes().is_nan());
let stats4 = InnerNodeStats {
total_header_bytes: 0,
sum_capped_prefix_len_bytes: 50,
..Default::default()
};
assert!(stats4.percentage_header_bytes().is_infinite());
assert!(stats4.percentage_header_bytes().is_sign_positive());
let stats5 = InnerNodeStats {
count: 10,
mem_usage: 0,
..Default::default()
};
assert_eq!(stats5.node_size(), Some(0));
let stats6 = InnerNodeStats {
count: 100,
mem_usage: 100,
..Default::default()
};
assert_eq!(stats6.node_size(), Some(1));
let stats7 = InnerNodeStats {
count: 10,
total_slots: 100,
sum_slots: 20,
sum_prefix_len_bytes: 30,
sum_capped_prefix_len_bytes: 25,
total_header_bytes: 50,
..Default::default()
};
assert_eq!(stats7.free_slots(), 80);
assert_eq!(stats7.percentage_slots(), 0.2);
assert_eq!(stats7.avg_prefix_len(), 3.0);
assert_eq!(stats7.avg_capped_prefix_len(), 2.5);
assert_eq!(stats7.free_header_bytes(), 25);
let stats_div_zero = InnerNodeStats::default();
assert!(stats_div_zero.percentage_slots().is_nan());
assert!(stats_div_zero.avg_prefix_len().is_nan());
assert!(stats_div_zero.avg_capped_prefix_len().is_nan());
}
#[test]
fn test_leaf_stats_add() {
let stats1 = LeafStats {
count: 10,
sum_key_bytes: 100,
mem_usage: 1000,
};
let stats2 = LeafStats {
count: 5,
sum_key_bytes: 50,
mem_usage: 500,
};
let expected_sum = LeafStats {
count: 15,
sum_key_bytes: 150,
mem_usage: 1500,
};
assert_eq!(stats1 + stats2, expected_sum);
let default = LeafStats::default();
let sum_with_default = stats1 + default;
assert_eq!(sum_with_default, stats1);
}
#[test]
fn test_inner_node_stats_add() {
let stats1 = InnerNodeStats {
count: 1,
total_slots: 2,
sum_slots: 3,
total_header_bytes: 4,
sum_prefix_len_bytes: 5,
sum_capped_prefix_len_bytes: 6,
max_prefix_len_bytes: 7,
mem_usage: 8,
};
let stats2 = InnerNodeStats {
count: 10,
total_slots: 20,
sum_slots: 30,
total_header_bytes: 40,
sum_prefix_len_bytes: 50,
sum_capped_prefix_len_bytes: 60,
max_prefix_len_bytes: 70,
mem_usage: 80,
};
let sum = stats1 + stats2;
assert_eq!(sum.count, 11);
assert_eq!(sum.total_slots, 22);
assert_eq!(sum.sum_slots, 33);
assert_eq!(sum.total_header_bytes, 44);
assert_eq!(sum.sum_prefix_len_bytes, 55);
assert_eq!(sum.sum_capped_prefix_len_bytes, 66);
assert_eq!(sum.max_prefix_len_bytes, 70);
assert_eq!(sum.mem_usage, 88);
}
#[test]
fn test_tree_stats_calculations() {
let tree_stats = TreeStats {
tree: InnerNodeStats {
mem_usage: 1000,
..Default::default()
},
leaf: LeafStats {
count: 50,
mem_usage: 500,
..Default::default()
},
..Default::default()
};
assert_eq!(tree_stats.total_memory_usage(), 1500);
assert_eq!(tree_stats.bytes_per_entry(), 20.0);
assert_eq!(tree_stats.bytes_per_entry_with_leaf(), 30.0);
let empty_tree_stats = TreeStats::default();
assert!(empty_tree_stats.bytes_per_entry().is_nan());
assert!(empty_tree_stats.bytes_per_entry_with_leaf().is_nan());
let no_leaf_stats = TreeStats {
tree: InnerNodeStats {
mem_usage: 1000,
..Default::default()
},
..Default::default()
};
assert!(no_leaf_stats.bytes_per_entry().is_infinite());
assert!(no_leaf_stats.bytes_per_entry_with_leaf().is_infinite());
}
#[test]
fn tree_with_node48_and_node256() {
use NodeType::*;
let mut tree: TreeMap<Vec<u8>, u8> = TreeMap::new();
for i in 0u8..48 {
tree.try_insert(vec![i], i).unwrap();
}
let stats = TreeStatsCollector::collect(&tree).unwrap();
assert!(stats.inner_node.get(Node4).is_none());
assert!(stats.inner_node.get(Node16).is_none());
assert_eq!(stats.inner_node[Node48].count, 1);
assert!(stats.inner_node.get(Node256).is_none());
for i in 32u8..255 {
tree.try_insert(vec![i], i).unwrap();
}
let stats = TreeStatsCollector::collect(&tree).unwrap();
assert!(stats.inner_node.get(Node4).is_none());
assert!(stats.inner_node.get(Node16).is_none());
assert!(stats.inner_node.get(Node48).is_none());
assert_eq!(stats.inner_node[Node256].count, 1);
}
}