re_chunk_store/
stats.rs

1use std::sync::Arc;
2
3use re_byte_size::SizeBytes;
4use re_chunk::{Chunk, ComponentIdentifier, EntityPath, TimelineName};
5
6use crate::ChunkStore;
7
8// ---
9
10#[derive(Default, Debug, Clone, Copy)]
11pub struct ChunkStoreStats {
12    pub static_chunks: ChunkStoreChunkStats,
13    pub temporal_chunks: ChunkStoreChunkStats,
14}
15
16impl ChunkStoreStats {
17    #[inline]
18    pub fn total(&self) -> ChunkStoreChunkStats {
19        let Self {
20            static_chunks,
21            temporal_chunks,
22        } = *self;
23        static_chunks + temporal_chunks
24    }
25}
26
27impl std::ops::Add for ChunkStoreStats {
28    type Output = Self;
29
30    #[inline]
31    fn add(self, rhs: Self) -> Self::Output {
32        let Self {
33            static_chunks,
34            temporal_chunks,
35        } = self;
36
37        let static_chunks = static_chunks + rhs.static_chunks;
38        let temporal_chunks = temporal_chunks + rhs.temporal_chunks;
39
40        Self {
41            static_chunks,
42            temporal_chunks,
43        }
44    }
45}
46
47impl std::ops::Sub for ChunkStoreStats {
48    type Output = Self;
49
50    #[inline]
51    fn sub(self, rhs: Self) -> Self::Output {
52        let Self {
53            static_chunks,
54            temporal_chunks,
55        } = self;
56
57        let static_chunks = static_chunks - rhs.static_chunks;
58        let temporal_chunks = temporal_chunks - rhs.temporal_chunks;
59
60        Self {
61            static_chunks,
62            temporal_chunks,
63        }
64    }
65}
66
67impl ChunkStore {
68    #[inline]
69    pub fn stats(&self) -> ChunkStoreStats {
70        ChunkStoreStats {
71            static_chunks: self.static_chunks_stats,
72            temporal_chunks: self.temporal_chunks_stats,
73        }
74    }
75}
76
77// ---
78
79/// Stats about a collection of chunks.
80///
81/// Each chunk contains data for only one entity.
82///
83/// Each chunk has data for either zero timelines (static chunk) or multiple timelines (temporal chunk).
84/// A temporal chunk has dense timelines.
85///
86/// Each chunk can contain multiple components (columns).
87#[derive(Default, Debug, Clone, Copy)]
88pub struct ChunkStoreChunkStats {
89    /// The number of chunks this is the stats for.
90    pub num_chunks: u64,
91
92    /// Includes everything: arrow payloads, timelines, rowids, and chunk overhead.
93    ///
94    /// This is an approximation of the actual storage cost of an entity,
95    /// as the measurement includes the overhead of various data structures
96    /// we use in the database.
97    /// It is imprecise, because it does not account for every possible place
98    /// someone may be storing something related to the entity, only most of
99    /// what is accessible inside this chunk store.
100    pub total_size_bytes: u64,
101
102    /// Number of rows.
103    ///
104    /// This is usually the same as the number of log calls the user made.
105    /// Each row can contain multiple events (see [`Self::num_events`]).
106    pub num_rows: u64,
107
108    /// How many _component batches_ ("cells").
109    pub num_events: u64,
110}
111
112impl std::fmt::Display for ChunkStoreChunkStats {
113    #[inline]
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        let Self {
116            num_chunks,
117            total_size_bytes,
118            num_rows,
119            num_events,
120        } = *self;
121
122        f.write_fmt(format_args!(
123            "num_chunks: {}\n",
124            re_format::format_uint(num_chunks)
125        ))?;
126        f.write_fmt(format_args!(
127            "total_size_bytes: {}\n",
128            re_format::format_bytes(total_size_bytes as _)
129        ))?;
130        f.write_fmt(format_args!(
131            "num_rows: {}\n",
132            re_format::format_uint(num_rows)
133        ))?;
134        f.write_fmt(format_args!(
135            "num_events: {}\n",
136            re_format::format_uint(num_events)
137        ))?;
138
139        Ok(())
140    }
141}
142
143impl std::ops::Add for ChunkStoreChunkStats {
144    type Output = Self;
145
146    #[inline]
147    fn add(self, rhs: Self) -> Self::Output {
148        Self {
149            num_chunks: self.num_chunks + rhs.num_chunks,
150            total_size_bytes: self.total_size_bytes + rhs.total_size_bytes,
151            num_rows: self.num_rows + rhs.num_rows,
152            num_events: self.num_events + rhs.num_events,
153        }
154    }
155}
156
157impl std::ops::AddAssign for ChunkStoreChunkStats {
158    #[inline]
159    fn add_assign(&mut self, rhs: Self) {
160        *self = *self + rhs;
161    }
162}
163
164impl std::ops::Sub for ChunkStoreChunkStats {
165    type Output = Self;
166
167    #[inline]
168    fn sub(self, rhs: Self) -> Self::Output {
169        Self {
170            num_chunks: self.num_chunks - rhs.num_chunks,
171            total_size_bytes: self.total_size_bytes - rhs.total_size_bytes,
172            num_rows: self.num_rows - rhs.num_rows,
173            num_events: self.num_events - rhs.num_events,
174        }
175    }
176}
177
178impl std::ops::SubAssign for ChunkStoreChunkStats {
179    #[inline]
180    fn sub_assign(&mut self, rhs: Self) {
181        *self = *self - rhs;
182    }
183}
184
185impl std::iter::Sum for ChunkStoreChunkStats {
186    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
187        let mut sum = Self::default();
188        for item in iter {
189            sum += item;
190        }
191        sum
192    }
193}
194
195impl ChunkStoreChunkStats {
196    #[inline]
197    pub fn from_chunk(chunk: &Arc<Chunk>) -> Self {
198        // NOTE: Do _NOT_ use `chunk.total_size_bytes` as it is sitting behind an Arc
199        // and would count as amortized (i.e. 0 bytes).
200        let size_bytes = <Chunk as SizeBytes>::total_size_bytes(&**chunk);
201
202        Self {
203            num_chunks: 1,
204            total_size_bytes: size_bytes,
205            num_rows: chunk.num_rows() as u64,
206            num_events: chunk.num_events_cumulative(),
207        }
208    }
209}
210
211// ----------------------------------------------------------------------------
212
213/// ## Entity stats
214impl ChunkStore {
215    /// Stats about all chunks with static data for an entity.
216    pub fn entity_stats_static(&self, entity_path: &EntityPath) -> ChunkStoreChunkStats {
217        re_tracing::profile_function!();
218
219        self.static_chunk_ids_per_entity.get(entity_path).map_or(
220            ChunkStoreChunkStats::default(),
221            |static_chunks_per_component| {
222                let chunk_ids: ahash::HashSet<re_chunk::ChunkId> =
223                    static_chunks_per_component.values().copied().collect();
224
225                chunk_ids
226                    .into_iter()
227                    .filter_map(|chunk_id| self.chunks_per_chunk_id.get(&chunk_id))
228                    .map(ChunkStoreChunkStats::from_chunk)
229                    .sum()
230            },
231        )
232    }
233
234    /// Stats about all the chunks that has data for an entity on a specific timeline.
235    ///
236    /// Does NOT include static data.
237    pub fn entity_stats_on_timeline(
238        &self,
239        entity_path: &EntityPath,
240        timeline: &TimelineName,
241    ) -> ChunkStoreChunkStats {
242        re_tracing::profile_function!();
243
244        self.temporal_chunk_ids_per_entity
245            .get(entity_path)
246            .and_then(|temporal_chunk_ids_per_timeline| {
247                temporal_chunk_ids_per_timeline.get(timeline)
248            })
249            .map_or(
250                ChunkStoreChunkStats::default(),
251                |chunk_id_sets| -> ChunkStoreChunkStats {
252                    chunk_id_sets
253                        .per_start_time
254                        .values()
255                        .flat_map(|chunk_ids| chunk_ids.iter())
256                        .filter_map(|id| self.chunks_per_chunk_id.get(id))
257                        .map(ChunkStoreChunkStats::from_chunk)
258                        .sum()
259                },
260            )
261    }
262}
263
264/// ## Component path stats
265impl ChunkStore {
266    /// Returns the number of static events logged for an entity for a specific component.
267    ///
268    /// This ignores temporal events.
269    pub fn num_static_events_for_component(
270        &self,
271        entity_path: &EntityPath,
272        component: ComponentIdentifier,
273    ) -> u64 {
274        re_tracing::profile_function!();
275
276        self.static_chunk_ids_per_entity
277            .get(entity_path)
278            .and_then(|static_chunks_per_component| static_chunks_per_component.get(&component))
279            .and_then(|chunk_id| self.chunks_per_chunk_id.get(chunk_id))
280            .and_then(|chunk| chunk.num_events_for_component(component))
281            .unwrap_or(0)
282    }
283
284    /// Returns the number of temporal events logged for an entity for a specific component on a given timeline.
285    ///
286    /// This ignores static events.
287    pub fn num_temporal_events_for_component_on_timeline(
288        &self,
289        timeline: &TimelineName,
290        entity_path: &EntityPath,
291        component: ComponentIdentifier,
292    ) -> u64 {
293        re_tracing::profile_function!();
294
295        self.temporal_chunk_ids_per_entity_per_component
296            .get(entity_path)
297            .and_then(|temporal_chunk_ids_per_timeline| {
298                temporal_chunk_ids_per_timeline.get(timeline)
299            })
300            .and_then(|temporal_chunk_ids_per_component| {
301                temporal_chunk_ids_per_component.get(&component)
302            })
303            .map_or(0, |chunk_id_sets| {
304                chunk_id_sets
305                    .per_start_time
306                    .values()
307                    .flat_map(|chunk_ids| chunk_ids.iter())
308                    .filter_map(|chunk_id| self.chunks_per_chunk_id.get(chunk_id))
309                    .filter_map(|chunk| chunk.num_events_for_component(component))
310                    .sum()
311            })
312    }
313
314    /// Returns the number of temporal events logged for an entity for a specific component on all timelines.
315    ///
316    /// This ignores static events.
317    pub fn num_temporal_events_for_component_on_all_timelines(
318        &self,
319        entity_path: &EntityPath,
320        component: ComponentIdentifier,
321    ) -> u64 {
322        self.timelines()
323            .keys()
324            .map(|timeline| {
325                self.num_temporal_events_for_component_on_timeline(timeline, entity_path, component)
326            })
327            .sum()
328    }
329}