Skip to main content

priority_lfu/
deepsize.rs

1#![forbid(missing_docs)]
2
3//! A utility for recursively measuring the size of an object
4//!
5//! This contains the [`DeepSizeOf`](DeepSizeOf) trait, and re-exports
6//! the `DeepSizeOf` derive macro from [`deepsize_derive`](https://docs.rs/deepsize_derive)
7//!
8//! ```rust
9//! use priority_lfu::DeepSizeOf;
10//!
11//! #[derive(DeepSizeOf)]
12//! struct Test {
13//!     a: u32,
14//!     b: Box<u8>,
15//! }
16//!
17//! let object = Test {
18//!     a: 15,
19//!     b: Box::new(255),
20//! };
21//!
22//! // The stack size of the struct:
23//! //    The size of a u32 (4)
24//! //    4 bytes padding (64 bit only)
25//! //    The stack size of the Box (a usize pointer, 32 or 64 bits: 4 or 8 bytes)
26//! // + the size of a u8 (1), the Box's heap storage
27//! #[cfg(target_pointer_width = "64")]
28//! assert_eq!(object.deep_size_of(), 17);
29//! #[cfg(target_pointer_width = "32")]
30//! assert_eq!(object.deep_size_of(), 9);
31//! ```
32
33extern crate alloc;
34extern crate core;
35
36extern crate self as priority_lfu_derive;
37// pub use priority_lfu_derive::*;
38
39use core::mem::{size_of, size_of_val};
40
41#[cfg(test)]
42mod test;
43
44mod default_impls;
45mod external_impls;
46
47/// A trait for measuring the size of an object and its children
48///
49/// In many cases this is just `std::mem::size_of::<T>()`, but if
50/// the struct contains a `Vec`, `String`, `Box`, or other allocated object or
51/// reference, then it is the size of the struct, plus the size of the contents
52/// of the object.
53pub trait DeepSizeOf {
54	/// Returns an estimation of a total size of memory owned by the
55	/// object, including heap-managed storage.
56	///
57	/// This is an estimation and not a precise result because it
58	/// doesn't account for allocator's overhead.
59	///
60	/// ```rust
61	/// use priority_lfu::DeepSizeOf;
62	///
63	/// let mut map: Vec<(Box<u32>, String)> = Vec::new();
64	///
65	/// map.push((Box::new(42u32), String::from("Hello World")));
66	/// map.push((Box::new(20u32), String::from("Something")));
67	/// map.push((Box::new(0u32),  String::from("A string")));
68	/// map.push((Box::new(255u32), String::from("Dynamically Allocated!")));
69	///
70	/// assert_eq!(map.deep_size_of(),
71	///     std::mem::size_of::<Vec<(Box<u32>, String)>>() +
72	///     4 * std::mem::size_of::<(Box<u32>, String)>() +
73	///     4 * std::mem::size_of::<u32>() +
74	///     11 + 9 + 8 + 22
75	/// );
76	/// ```
77	fn deep_size_of(&self) -> usize {
78		size_of_val(self) + self.deep_size_of_children(&mut Context::new())
79	}
80
81	/// Returns an estimation of the heap-managed storage of this object.
82	/// This does not include the size of the object itself.
83	///
84	/// This is an estimation and not a precise result, because it
85	/// doesn't account for allocator's overhead.
86	///
87	/// This is an internal function (this shouldn't be called directly),
88	/// and requires a [`Context`](Context) to track visited references.
89	/// Implementations of this function should only call `deep_size_of_children`,
90	/// and not `deep_size_of` so that they reference tracking is not reset.
91	///
92	/// In all other cases, `deep_size_of` should be called instead of this function.
93	///
94	/// If a struct and all of its children do not allocate or have references,
95	/// this method should return `0`, as it cannot have any heap allocated
96	/// children.  There is a shortcut macro for this implementation,
97	/// [`known_size_of`](known_size_of), used like `known_deep_size!(0, (), u32, u64);` which
98	/// generates the impls.
99	///
100	/// The most common way to use this method, and how the derive works,
101	/// is to call this method on each of the structs members and sum the
102	/// results, which works as long as all members of the struct implement
103	/// `DeepSizeOf`.
104	///
105	/// To implement this for a collection type, you should sum the deep sizes of
106	/// the items of the collection and then add the size of the allocation of the
107	/// collection itself.  This can become much more complicated if the collection
108	/// has multiple separate allocations.
109	///
110	/// Here is an example from the implementation of `DeepSizeOf` for `Vec<T>`
111	/// ```rust, ignore
112	/// # use priority_lfu_derive::{DeepSizeOf, Context};
113	/// impl<T> DeepSizeOf for std::vec::Vec<T> where T: DeepSizeOf {
114	///     fn deep_size_of_children(&self, context: &mut Context) -> usize {
115	///         // Size of heap allocations for each child
116	///         self.iter().map(|child| child.deep_size_of_children(context)).sum()
117	///          + self.capacity() * std::mem::size_of::<T>()  // Size of Vec's heap allocation
118	///     }
119	/// }
120	/// ```
121	fn deep_size_of_children(&self, context: &mut Context) -> usize;
122}
123
124use std::collections::HashSet as GenericSet;
125
126/// The context of which references have already been seen.
127/// This should only be used in the implementation of the
128/// `deep_size_of_children` function, and (eventually, when this
129/// reaches 0.2) will not be able to be constructed, and only obtained
130/// from inside the function.
131///
132/// Keeps track of the [`Arc`](std::sync::Arc)s, [`Rc`](std::rc::Rc)s, and references
133/// that have been visited, so that [`Arc`](std::sync::Arc)s and other references
134/// aren't double counted.
135///
136/// Currently this counts each reference once, although there are arguments for
137/// only counting owned data and ignoring partial ownership, or for counting
138/// partial references such as Arc as its size divided by the strong reference count.
139///
140/// [Github issue discussion here](https://github.com/dtolnay/request-for-implementation/issues/22)
141///
142/// This must be passed between `deep_size_of_children` calls when
143/// recursing, so that references are not counted multiple timse.
144#[derive(Debug)]
145pub struct Context {
146	/// A set of all [`Arc`](std::sync::Arc)s that have already been counted
147	arcs: GenericSet<usize>,
148	/// A set of all [`Rc`](std::sync::Arc)s that have already been counted
149	rcs: GenericSet<usize>,
150}
151
152impl Context {
153	/// Creates a new empty context for use in the `deep_size` functions
154	fn new() -> Self {
155		Self {
156			arcs: GenericSet::new(),
157			rcs: GenericSet::new(),
158		}
159	}
160
161	/// Adds an [`Arc`](std::sync::Arc) to the list of visited [`Arc`](std::sync::Arc)s
162	fn add_arc<T: ?Sized>(&mut self, arc: &alloc::sync::Arc<T>) {
163		self.arcs.insert(&**arc as *const T as *const u8 as usize);
164	}
165	/// Checks if an [`Arc`](std::sync::Arc) is in the list visited [`Arc`](std::sync::Arc)s
166	fn contains_arc<T: ?Sized>(&self, arc: &alloc::sync::Arc<T>) -> bool {
167		self.arcs.contains(&(&**arc as *const T as *const u8 as usize))
168	}
169
170	/// Adds an [`Rc`](std::rc::Rc) to the list of visited [`Rc`](std::rc::Rc)s
171	fn add_rc<T: ?Sized>(&mut self, rc: &alloc::rc::Rc<T>) {
172		self.rcs.insert(&**rc as *const T as *const u8 as usize);
173	}
174	/// Checks if an [`Rc`](std::rc::Rc) is in the list visited [`Rc`](std::rc::Rc)s
175	/// Adds an [`Rc`](std::rc::Rc) to the list of visited [`Rc`](std::rc::Rc)s
176	fn contains_rc<T: ?Sized>(&self, rc: &alloc::rc::Rc<T>) -> bool {
177		self.rcs.contains(&(&**rc as *const T as *const u8 as usize))
178	}
179}
180
181impl<T> DeepSizeOf for alloc::vec::Vec<T>
182where
183	T: DeepSizeOf,
184{
185	/// Sums the size of each child object, and then adds the size of
186	/// the unused capacity.
187	///
188	/// ```rust
189	/// use priority_lfu::DeepSizeOf;
190	///
191	/// let mut vec: Vec<u8> = vec![];
192	/// for i in 0..13 {
193	///     vec.push(i);
194	/// }
195	///
196	/// // The capacity (16) plus three usizes (len, cap, pointer)
197	/// assert_eq!(vec.deep_size_of(), 16 + 24);
198	/// ```
199	/// With allocated objects:
200	/// ```rust
201	/// use priority_lfu::DeepSizeOf;
202	///
203	/// let mut vec: Vec<Box<u64>> = vec![];
204	/// for i in 0..13 {
205	///     vec.push(Box::new(i));
206	/// }
207	///
208	/// // The capacity (16?) * size (8) plus three usizes (len, cap, pointer)
209	/// // and length (13) * the allocated size of each object
210	/// assert_eq!(vec.deep_size_of(), 24 + vec.capacity() * 8 + 13 * 8);
211	/// ```
212	fn deep_size_of_children(&self, context: &mut Context) -> usize {
213		self.iter().map(|child| child.deep_size_of_children(context)).sum::<usize>()
214			+ self.capacity() * size_of::<T>()
215		// Size of unused capacity
216	}
217}
218
219impl<T> DeepSizeOf for std::collections::VecDeque<T>
220where
221	T: DeepSizeOf,
222{
223	/// Sums the size of each child object, and then adds the size of
224	/// the unused capacity.
225	///
226	/// ```rust
227	/// use priority_lfu::DeepSizeOf;
228	/// use std::collections::VecDeque;
229	///
230	/// let mut vec: VecDeque<u8> = VecDeque::new();
231	/// for i in 0..12 {
232	///     vec.push_back(i);
233	/// }
234	/// vec.push_front(13);
235	///
236	/// // The capacity (15?) plus four usizes (start, end, cap, pointer)
237	/// assert_eq!(vec.deep_size_of(), vec.capacity() * 1 + 32);
238	/// ```
239	/// With allocated objects:
240	/// ```rust
241	/// use priority_lfu::DeepSizeOf;
242	/// use std::collections::VecDeque;
243	///
244	/// let mut vec: VecDeque<Box<u64>> = VecDeque::new();
245	/// for i in 0..12 {
246	///     vec.push_back(Box::new(i));
247	/// }
248	/// vec.push_front(Box::new(13));
249	///
250	/// // The capacity (15?) * size (8) plus four usizes (start, end, cap, pointer)
251	/// // and length (13) * the allocated size of each object
252	/// assert_eq!(vec.deep_size_of(), 32 + vec.capacity() * 8 + 13 * 8);
253	/// ```
254	fn deep_size_of_children(&self, context: &mut Context) -> usize {
255		// Deep size of children
256		self.iter().map(|child| child.deep_size_of_children(context)).sum::<usize>()
257			+ self.capacity() * size_of::<T>() // Size of Vec's heap allocation
258	}
259}
260
261impl<T> DeepSizeOf for alloc::collections::LinkedList<T>
262where
263	T: DeepSizeOf,
264{
265	/// Sums the size of each child object, assuming the overhead of
266	/// each node is 2 usize (next, prev)
267	///
268	/// ```rust
269	/// use priority_lfu::DeepSizeOf;
270	/// use std::collections::LinkedList;
271	///
272	/// let mut list: LinkedList<u8> = LinkedList::new();
273	/// for i in 0..12 {
274	///     list.push_back(i);
275	/// }
276	/// list.push_front(13);
277	///
278	/// assert_eq!(list.deep_size_of(), std::mem::size_of::<LinkedList<u8>>()
279	///                                + 13 * 1 + 13 * 2 * 8);
280	/// ```
281	fn deep_size_of_children(&self, context: &mut Context) -> usize {
282		self.iter().fold(0, |sum, child| {
283			sum + size_of_val(child) + child.deep_size_of_children(context) + size_of::<usize>() * 2
284			// overhead of each node
285		})
286	}
287}
288
289impl<K, V, S> DeepSizeOf for std::collections::HashMap<K, V, S>
290where
291	K: DeepSizeOf + Eq + std::hash::Hash,
292	V: DeepSizeOf,
293	S: std::hash::BuildHasher,
294{
295	// How much more overhead is there to a hashmap? The docs say it is
296	// essentially just a Vec<Option<(u64, K, V)>>;
297	// For the old implementation of HashMap:
298	// fn deep_size_of_children(&self, context: &mut Context) -> usize {
299	//     self.iter().fold(0, |sum, (key, val)| {
300	//         sum + key.deep_size_of_children(context) + val.deep_size_of_children(context)
301	//     }) + self.capacity() * size_of::<Option<(u64, K, V)>>()
302	// }
303	// For the hashbrown implementation of HashMap:
304	fn deep_size_of_children(&self, context: &mut Context) -> usize {
305		self.iter().fold(0, |sum, (key, val)| {
306			sum + key.deep_size_of_children(context) + val.deep_size_of_children(context)
307		}) + self.capacity() * size_of::<(K, V)>()
308		// Buckets would be the more correct value, but there isn't
309		// an API for accessing that with hashbrown.
310		// I believe that hashbrown's HashTable is represented as
311		// an array of (K, V), with control bytes at the start/end
312		// that mark used/uninitialized buckets (?)
313	}
314}
315
316impl<K, S> DeepSizeOf for std::collections::HashSet<K, S>
317where
318	K: DeepSizeOf + Eq + std::hash::Hash,
319	S: std::hash::BuildHasher,
320{
321	fn deep_size_of_children(&self, context: &mut Context) -> usize {
322		// self.iter()
323		//     .fold(0, |sum, item| sum + item.deep_size_of_children(context))
324		//     + self.capacity() * size_of::<Option<(u64, K, ())>>()
325		self.iter().fold(0, |sum, key| sum + key.deep_size_of_children(context))
326			+ self.capacity() * size_of::<K>()
327	}
328}
329
330// A btree node has 2*B - 1 (K,V) pairs and (usize, u16, u16)
331// overhead, and an internal btree node additionally has 2*B
332// `usize` overhead.
333// A node can contain between B - 1 and 2*B - 1 elements, so
334// we assume it has the midpoint 3/2*B - 1.
335
336// Constants from rust's source:
337// https://doc.rust-lang.org/src/alloc/collections/btree/node.rs.html#43-45
338
339const BTREE_B: usize = 6;
340const BTREE_MIN: usize = 2 * BTREE_B - 1;
341const BTREE_MAX: usize = BTREE_B - 1;
342
343impl<K: Ord + DeepSizeOf, V: DeepSizeOf> DeepSizeOf for std::collections::BTreeMap<K, V> {
344	fn deep_size_of_children(&self, context: &mut Context) -> usize {
345		let element_size = self.iter().fold(0, |sum, (k, v)| {
346			sum + k.deep_size_of_children(context) + v.deep_size_of_children(context)
347		});
348		let overhead = size_of::<(usize, u16, u16, [(K, V); BTREE_MAX], [usize; BTREE_B])>();
349		element_size + self.len() * overhead * 2 / (BTREE_MAX + BTREE_MIN)
350	}
351}
352
353impl<K: Ord + DeepSizeOf> DeepSizeOf for std::collections::BTreeSet<K> {
354	fn deep_size_of_children(&self, context: &mut Context) -> usize {
355		let element_size =
356			self.iter().fold(0, |sum, item| sum + item.deep_size_of_children(context));
357		let overhead = size_of::<(usize, u16, u16, [K; BTREE_MAX], [usize; BTREE_B])>();
358		element_size + self.len() * overhead * 2 / (BTREE_MAX + BTREE_MIN)
359	}
360}
361
362impl<T> DeepSizeOf for alloc::boxed::Box<T>
363where
364	T: DeepSizeOf + ?Sized,
365{
366	fn deep_size_of_children(&self, context: &mut Context) -> usize {
367		let val: &T = self;
368		size_of_val(val) + val.deep_size_of_children(context)
369	}
370}
371
372impl<T> DeepSizeOf for alloc::sync::Arc<T>
373where
374	T: DeepSizeOf + ?Sized,
375{
376	fn deep_size_of_children(&self, context: &mut Context) -> usize {
377		if context.contains_arc(self) {
378			0
379		} else {
380			context.add_arc(self);
381			let val: &T = self;
382			// Size of the Arc, size of the value, size of the allocations of the value
383			size_of_val(val) + val.deep_size_of_children(context)
384		}
385	}
386}
387
388impl<T> DeepSizeOf for alloc::rc::Rc<T>
389where
390	T: DeepSizeOf + ?Sized,
391{
392	fn deep_size_of_children(&self, context: &mut Context) -> usize {
393		if context.contains_rc(self) {
394			0
395		} else {
396			context.add_rc(self);
397			let val: &T = self;
398			size_of_val(val) + val.deep_size_of_children(context)
399		}
400	}
401}
402
403// References aren't owned; should they count?
404impl<T> DeepSizeOf for &T
405where
406	T: DeepSizeOf + ?Sized,
407{
408	fn deep_size_of_children(&self, _context: &mut Context) -> usize {
409		0
410		// if context.contains_ref(&self) {
411		//     0
412		// } else {
413		//     context.add_ref(&self);
414		//     size_of_val(*self) + (*self).deep_size_of_children(context)
415		// }
416	}
417}
418
419impl<T> DeepSizeOf for &mut T
420where
421	T: DeepSizeOf + ?Sized,
422{
423	fn deep_size_of_children(&self, _context: &mut Context) -> usize {
424		0
425	}
426}
427
428impl<T> DeepSizeOf for [T]
429where
430	T: DeepSizeOf,
431{
432	fn deep_size_of_children(&self, context: &mut Context) -> usize {
433		self.iter().map(|child| child.deep_size_of_children(context)).sum()
434	}
435}