1pub use lance_derive::DeepSizeOf;
5
6use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
7use std::mem::{size_of, size_of_val};
8use std::sync::atomic::{AtomicU64, AtomicUsize};
9use std::sync::{Arc, Mutex, RwLock};
10
11use arrow_array::{Array, RecordBatch};
12use arrow_buffer::ArrowNativeType;
13use arrow_data::ArrayData;
14
15pub struct Context {
16 seen: HashSet<usize>,
17}
18
19impl Default for Context {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl Context {
26 pub fn new() -> Self {
27 Self {
28 seen: HashSet::new(),
29 }
30 }
31
32 pub fn mark_seen(&mut self, ptr: usize) -> bool {
34 self.seen.insert(ptr)
35 }
36}
37
38pub trait DeepSizeOf {
39 fn deep_size_of(&self) -> usize {
40 size_of_val(self) + self.deep_size_of_children(&mut Context::new())
41 }
42
43 fn deep_size_of_children(&self, context: &mut Context) -> usize;
44}
45
46macro_rules! impl_deep_size_primitive {
48 ($($t:ty),*) => {
49 $(
50 impl DeepSizeOf for $t {
51 fn deep_size_of_children(&self, _context: &mut Context) -> usize {
52 0
53 }
54 }
55 )*
56 };
57}
58
59impl_deep_size_primitive!(
60 u8,
61 u16,
62 u32,
63 u64,
64 u128,
65 usize,
66 i8,
67 i16,
68 i32,
69 i64,
70 i128,
71 isize,
72 f32,
73 f64,
74 bool,
75 ()
76);
77
78impl DeepSizeOf for str {
79 fn deep_size_of_children(&self, _context: &mut Context) -> usize {
80 0
81 }
82}
83
84impl DeepSizeOf for String {
85 fn deep_size_of_children(&self, _context: &mut Context) -> usize {
86 self.capacity()
87 }
88}
89
90impl DeepSizeOf for AtomicU64 {
91 fn deep_size_of_children(&self, _context: &mut Context) -> usize {
92 0
93 }
94}
95
96impl DeepSizeOf for AtomicUsize {
97 fn deep_size_of_children(&self, _context: &mut Context) -> usize {
98 0
99 }
100}
101
102impl<T: DeepSizeOf, const N: usize> DeepSizeOf for [T; N] {
103 fn deep_size_of_children(&self, context: &mut Context) -> usize {
104 self.iter()
105 .map(|item| item.deep_size_of_children(context))
106 .sum()
107 }
108}
109
110impl<T: DeepSizeOf> DeepSizeOf for [T] {
111 fn deep_size_of_children(&self, context: &mut Context) -> usize {
112 self.iter()
116 .map(|item| item.deep_size_of_children(context))
117 .sum()
118 }
119}
120
121impl<T: DeepSizeOf> DeepSizeOf for RwLock<T> {
122 fn deep_size_of_children(&self, context: &mut Context) -> usize {
123 self.read()
124 .map(|val| val.deep_size_of_children(context))
125 .unwrap_or(0)
126 }
127}
128
129impl<T: DeepSizeOf> DeepSizeOf for Mutex<T> {
130 fn deep_size_of_children(&self, context: &mut Context) -> usize {
131 self.lock()
132 .map(|val| val.deep_size_of_children(context))
133 .unwrap_or(0)
134 }
135}
136
137macro_rules! impl_deep_size_tuple {
139 ($($name:ident),+) => {
140 impl<$($name: DeepSizeOf),+> DeepSizeOf for ($($name,)+) {
141 #[allow(non_snake_case)]
142 fn deep_size_of_children(&self, context: &mut Context) -> usize {
143 let ($($name,)+) = self;
144 0 $(+ $name.deep_size_of_children(context))+
145 }
146 }
147 };
148}
149
150impl_deep_size_tuple!(A, B);
151impl_deep_size_tuple!(A, B, C);
152impl_deep_size_tuple!(A, B, C, D);
153impl_deep_size_tuple!(A, B, C, D, E);
154impl_deep_size_tuple!(A, B, C, D, E, F);
155
156impl<T: DeepSizeOf> DeepSizeOf for Vec<T> {
157 fn deep_size_of_children(&self, context: &mut Context) -> usize {
158 self.capacity() * size_of::<T>()
159 + self
160 .iter()
161 .map(|item| item.deep_size_of_children(context))
162 .sum::<usize>()
163 }
164}
165
166impl<T: DeepSizeOf + ?Sized> DeepSizeOf for Box<T> {
167 fn deep_size_of_children(&self, context: &mut Context) -> usize {
168 size_of_val(&**self) + (**self).deep_size_of_children(context)
169 }
170}
171
172impl<T: DeepSizeOf + ?Sized> DeepSizeOf for Arc<T> {
173 fn deep_size_of_children(&self, context: &mut Context) -> usize {
174 if context.mark_seen(Self::as_ptr(self) as *const () as usize) {
175 size_of_val(&**self) + (**self).deep_size_of_children(context)
176 } else {
177 0
178 }
179 }
180}
181
182impl<T: DeepSizeOf> DeepSizeOf for Option<T> {
183 fn deep_size_of_children(&self, context: &mut Context) -> usize {
184 match self {
185 Some(val) => val.deep_size_of_children(context),
186 None => 0,
187 }
188 }
189}
190
191impl<K: DeepSizeOf, V: DeepSizeOf> DeepSizeOf for HashMap<K, V> {
192 fn deep_size_of_children(&self, context: &mut Context) -> usize {
193 let capacity_bytes = self.capacity() * (size_of::<K>() + size_of::<V>() + 1);
196 let children: usize = self
197 .iter()
198 .map(|(k, v)| k.deep_size_of_children(context) + v.deep_size_of_children(context))
199 .sum();
200 capacity_bytes + children
201 }
202}
203
204impl<K: DeepSizeOf> DeepSizeOf for HashSet<K> {
205 fn deep_size_of_children(&self, context: &mut Context) -> usize {
206 let capacity_bytes = self.capacity() * (size_of::<K>() + 1);
207 let children: usize = self.iter().map(|k| k.deep_size_of_children(context)).sum();
208 capacity_bytes + children
209 }
210}
211
212impl<K: DeepSizeOf, V: DeepSizeOf> DeepSizeOf for BTreeMap<K, V> {
213 fn deep_size_of_children(&self, context: &mut Context) -> usize {
214 let per_entry = size_of::<K>() + size_of::<V>() + 3 * size_of::<usize>();
216 let overhead = self.len() * per_entry;
217 let children: usize = self
218 .iter()
219 .map(|(k, v)| k.deep_size_of_children(context) + v.deep_size_of_children(context))
220 .sum();
221 overhead + children
222 }
223}
224
225impl<K: DeepSizeOf> DeepSizeOf for BTreeSet<K> {
226 fn deep_size_of_children(&self, context: &mut Context) -> usize {
227 let per_entry = size_of::<K>() + 3 * size_of::<usize>();
228 let overhead = self.len() * per_entry;
229 let children: usize = self.iter().map(|k| k.deep_size_of_children(context)).sum();
230 overhead + children
231 }
232}
233
234fn record_array_data(context: &mut Context, data: &ArrayData) -> usize {
237 let mut total = 0;
238 for buffer in data.buffers() {
239 if context.mark_seen(buffer.as_ptr() as usize) {
240 total += buffer.capacity();
241 }
242 }
243 if let Some(nulls) = data.nulls() {
244 let null_buf = nulls.inner().inner();
245 if context.mark_seen(null_buf.as_ptr() as usize) {
246 total += null_buf.capacity();
247 }
248 }
249 for child in data.child_data() {
250 total += record_array_data(context, child);
251 }
252 total
253}
254
255impl DeepSizeOf for dyn Array {
256 fn deep_size_of_children(&self, context: &mut Context) -> usize {
257 let data = self.to_data();
261 record_array_data(context, &data)
262 }
263}
264
265impl DeepSizeOf for RecordBatch {
266 fn deep_size_of_children(&self, context: &mut Context) -> usize {
267 self.columns()
268 .iter()
269 .map(|col| col.deep_size_of_children(context))
270 .sum()
271 }
272}
273
274impl<T> DeepSizeOf for arrow_buffer::ScalarBuffer<T>
275where
276 T: ArrowNativeType,
277{
278 fn deep_size_of_children(&self, context: &mut Context) -> usize {
279 let buf = self.inner();
283 if context.mark_seen(buf.as_ptr() as usize) {
284 buf.capacity()
285 } else {
286 0
287 }
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294 use arrow_array::{Int32Array, StringArray, StructArray};
295 use arrow_schema::{DataType, Field, Fields, Schema};
296
297 #[test]
298 fn test_basic_record_batch() {
299 let batch = RecordBatch::try_new(
300 Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
301 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
302 )
303 .unwrap();
304
305 let size = batch.deep_size_of();
306 assert!(size >= 3 * size_of::<i32>());
308 }
309
310 #[test]
311 fn test_same_batch_dedup() {
312 let batch = RecordBatch::try_new(
313 Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
314 vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]))],
315 )
316 .unwrap();
317
318 let mut ctx = Context::new();
319 let size_a = batch.deep_size_of_children(&mut ctx);
320 let size_b = batch.deep_size_of_children(&mut ctx);
321
322 assert!(size_a > 0);
324 assert_eq!(size_b, 0);
326 }
327
328 #[test]
329 fn test_arc_dedup() {
330 let batch = Arc::new(
331 RecordBatch::try_new(
332 Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
333 vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
334 )
335 .unwrap(),
336 );
337 let clone = Arc::clone(&batch);
338
339 let mut ctx = Context::new();
340 let size_a = batch.deep_size_of_children(&mut ctx);
341 let size_b = clone.deep_size_of_children(&mut ctx);
342
343 assert!(size_a > 0);
344 assert_eq!(size_b, 0);
345 }
346
347 #[test]
348 fn test_multi_column_shared_array() {
349 let array: Arc<dyn Array> = Arc::new(Int32Array::from(vec![10, 20, 30]));
351 let schema = Arc::new(Schema::new(vec![
352 Field::new("a", DataType::Int32, false),
353 Field::new("b", DataType::Int32, false),
354 ]));
355
356 let one_col = RecordBatch::try_new(
358 Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
359 vec![array.clone()],
360 )
361 .unwrap();
362
363 let two_col = RecordBatch::try_new(schema, vec![array.clone(), array]).unwrap();
365
366 let mut ctx1 = Context::new();
367 let size_one = one_col.deep_size_of_children(&mut ctx1);
368
369 let mut ctx2 = Context::new();
370 let size_two = two_col.deep_size_of_children(&mut ctx2);
371
372 assert_eq!(size_one, size_two);
375 }
376
377 #[test]
378 fn test_nested_struct_array() {
379 let int_array = Int32Array::from(vec![1, 2, 3]);
380 let str_array = StringArray::from(vec!["a", "b", "c"]);
381 let struct_array = StructArray::from(vec![
382 (
383 Arc::new(Field::new("x", DataType::Int32, false)),
384 Arc::new(int_array) as Arc<dyn Array>,
385 ),
386 (
387 Arc::new(Field::new("y", DataType::Utf8, false)),
388 Arc::new(str_array) as Arc<dyn Array>,
389 ),
390 ]);
391
392 let batch = RecordBatch::try_new(
393 Arc::new(Schema::new(vec![Field::new(
394 "s",
395 DataType::Struct(Fields::from(vec![
396 Field::new("x", DataType::Int32, false),
397 Field::new("y", DataType::Utf8, false),
398 ])),
399 false,
400 )])),
401 vec![Arc::new(struct_array)],
402 )
403 .unwrap();
404
405 let size = batch.deep_size_of();
406 assert!(size > 3 * size_of::<i32>());
408 }
409
410 #[test]
411 fn test_std_types() {
412 assert_eq!(42u32.deep_size_of(), size_of::<u32>());
413
414 let s = String::from("hello");
415 assert!(s.deep_size_of() >= size_of::<String>() + 5);
416
417 let v = vec![1u32, 2, 3];
418 assert!(v.deep_size_of() >= size_of::<Vec<u32>>() + 3 * size_of::<u32>());
419
420 let a = Arc::new(42u32);
421 let b = Arc::clone(&a);
422 let mut ctx = Context::new();
423 let size_a = a.deep_size_of_children(&mut ctx);
424 let size_b = b.deep_size_of_children(&mut ctx);
425 assert_eq!(size_a, size_of::<u32>());
426 assert_eq!(size_b, 0);
427 }
428
429 #[test]
430 fn test_derive_macro() {
431 use lance_derive::DeepSizeOf;
432
433 #[derive(DeepSizeOf)]
434 struct Outer {
435 count: u64,
436 label: String,
437 inner: Inner,
438 }
439
440 #[derive(DeepSizeOf)]
441 struct Inner {
442 values: Vec<u32>,
443 }
444
445 let val = Outer {
446 count: 7,
447 label: String::from("hello"),
448 inner: Inner {
449 values: vec![1, 2, 3],
450 },
451 };
452
453 let size = val.deep_size_of();
454 assert!(size >= std::mem::size_of::<Outer>() + 5 + 3 * std::mem::size_of::<u32>());
456 }
457}