re_chunk_store/
stats.rs

1use std::sync::Arc;
2
3use re_byte_size::SizeBytes;
4use re_chunk::{Chunk, ComponentIdentifier, EntityPath, TimelineName};
5
6use crate::ChunkStore;
7
8// ---
9
10#[derive(Default, Debug, Clone, Copy)]
11pub struct ChunkStoreStats {
12    pub static_chunks: ChunkStoreChunkStats,
13    pub temporal_chunks: ChunkStoreChunkStats,
14}
15
16impl ChunkStoreStats {
17    #[inline]
18    pub fn total(&self) -> ChunkStoreChunkStats {
19        let Self {
20            static_chunks,
21            temporal_chunks,
22        } = *self;
23        static_chunks + temporal_chunks
24    }
25}
26
27impl std::ops::Add for ChunkStoreStats {
28    type Output = Self;
29
30    #[inline]
31    fn add(self, rhs: Self) -> Self::Output {
32        let Self {
33            static_chunks,
34            temporal_chunks,
35        } = self;
36
37        let static_chunks = static_chunks + rhs.static_chunks;
38        let temporal_chunks = temporal_chunks + rhs.temporal_chunks;
39
40        Self {
41            static_chunks,
42            temporal_chunks,
43        }
44    }
45}
46
47impl std::ops::Sub for ChunkStoreStats {
48    type Output = Self;
49
50    #[inline]
51    fn sub(self, rhs: Self) -> Self::Output {
52        let Self {
53            static_chunks,
54            temporal_chunks,
55        } = self;
56
57        let static_chunks = static_chunks - rhs.static_chunks;
58        let temporal_chunks = temporal_chunks - rhs.temporal_chunks;
59
60        Self {
61            static_chunks,
62            temporal_chunks,
63        }
64    }
65}
66
67impl ChunkStore {
68    #[inline]
69    pub fn stats(&self) -> ChunkStoreStats {
70        ChunkStoreStats {
71            static_chunks: self.static_chunks_stats,
72            temporal_chunks: self.temporal_chunks_stats,
73        }
74    }
75}
76
77// ---
78
79/// Stats about a collection of chunks.
80///
81/// Each chunk contains data for only one entity.
82///
83/// Each chunk has data for either zero timelines (static chunk) or multiple timelines (temporal chunk).
84/// A temporal chunk has dense timelines.
85///
86/// Each chunk can contain multiple components (columns).
87#[derive(Default, Debug, Clone, Copy)]
88pub struct ChunkStoreChunkStats {
89    /// The number of chunks this is the stats for.
90    pub num_chunks: u64,
91
92    /// Includes everything: arrow payloads, timelines, rowids, and chunk overhead.
93    ///
94    /// This is an approximation of the actual storage cost of an entity,
95    /// as the measurement includes the overhead of various data structures
96    /// we use in the database.
97    /// It is imprecise, because it does not account for every possible place
98    /// someone may be storing something related to the entity, only most of
99    /// what is accessible inside this chunk store.
100    pub total_size_bytes: u64,
101
102    /// Number of rows.
103    ///
104    /// This is usually the same as the number of log calls the user made.
105    /// Each row can contain multiple events (see [`Self::num_events`]).
106    pub num_rows: u64,
107
108    /// How many _component batches_ ("cells").
109    pub num_events: u64,
110}
111
112impl std::fmt::Display for ChunkStoreChunkStats {
113    #[inline]
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        let Self {
116            num_chunks,
117            total_size_bytes,
118            num_rows,
119            num_events,
120        } = *self;
121
122        f.write_fmt(format_args!(
123            "num_chunks: {}\n",
124            re_format::format_uint(num_chunks)
125        ))?;
126        f.write_fmt(format_args!(
127            "total_size_bytes: {}\n",
128            re_format::format_bytes(total_size_bytes as _)
129        ))?;
130        f.write_fmt(format_args!(
131            "num_rows: {}\n",
132            re_format::format_uint(num_rows)
133        ))?;
134        f.write_fmt(format_args!(
135            "num_events: {}\n",
136            re_format::format_uint(num_events)
137        ))?;
138
139        Ok(())
140    }
141}
142
143impl std::ops::Add for ChunkStoreChunkStats {
144    type Output = Self;
145
146    #[inline]
147    fn add(self, rhs: Self) -> Self::Output {
148        Self {
149            num_chunks: self.num_chunks + rhs.num_chunks,
150            total_size_bytes: self.total_size_bytes + rhs.total_size_bytes,
151            num_rows: self.num_rows + rhs.num_rows,
152            num_events: self.num_events + rhs.num_events,
153        }
154    }
155}
156
157impl std::ops::AddAssign for ChunkStoreChunkStats {
158    #[inline]
159    fn add_assign(&mut self, rhs: Self) {
160        *self = *self + rhs;
161    }
162}
163
164impl std::ops::Sub for ChunkStoreChunkStats {
165    type Output = Self;
166
167    #[inline]
168    fn sub(self, rhs: Self) -> Self::Output {
169        Self {
170            num_chunks: self.num_chunks - rhs.num_chunks,
171            total_size_bytes: self.total_size_bytes - rhs.total_size_bytes,
172            num_rows: self.num_rows - rhs.num_rows,
173            num_events: self.num_events - rhs.num_events,
174        }
175    }
176}
177
178impl std::ops::SubAssign for ChunkStoreChunkStats {
179    #[inline]
180    fn sub_assign(&mut self, rhs: Self) {
181        *self = *self - rhs;
182    }
183}
184
185impl std::iter::Sum for ChunkStoreChunkStats {
186    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
187        let mut sum = Self::default();
188        for item in iter {
189            sum += item;
190        }
191        sum
192    }
193}
194
195impl ChunkStoreChunkStats {
196    #[inline]
197    pub fn from_chunk(chunk: &Arc<Chunk>) -> Self {
198        // NOTE: Do _NOT_ use `chunk.total_size_bytes` as it is sitting behind an Arc
199        // and would count as amortized (i.e. 0 bytes).
200        let size_bytes = <Chunk as SizeBytes>::total_size_bytes(&**chunk);
201
202        Self {
203            num_chunks: 1,
204            total_size_bytes: size_bytes,
205            num_rows: chunk.num_rows() as u64,
206            num_events: chunk.num_events_cumulative(),
207        }
208    }
209}
210
211// ----------------------------------------------------------------------------
212
213/// ## Entity stats
214impl ChunkStore {
215    /// Stats about all chunks with static data for an entity.
216    pub fn entity_stats_static(&self, entity_path: &EntityPath) -> ChunkStoreChunkStats {
217        re_tracing::profile_function!();
218
219        self.static_chunk_ids_per_entity
220            .get(entity_path)
221            .map_or_else(
222                ChunkStoreChunkStats::default,
223                |static_chunks_per_component| {
224                    let chunk_ids: ahash::HashSet<re_chunk::ChunkId> =
225                        static_chunks_per_component.values().copied().collect();
226
227                    chunk_ids
228                        .into_iter()
229                        .filter_map(|chunk_id| self.chunks_per_chunk_id.get(&chunk_id))
230                        .map(ChunkStoreChunkStats::from_chunk)
231                        .sum()
232                },
233            )
234    }
235
236    /// Stats about all the chunks that has data for an entity on a specific timeline.
237    ///
238    /// Does NOT include static data.
239    pub fn entity_stats_on_timeline(
240        &self,
241        entity_path: &EntityPath,
242        timeline: &TimelineName,
243    ) -> ChunkStoreChunkStats {
244        re_tracing::profile_function!();
245
246        self.temporal_chunk_ids_per_entity
247            .get(entity_path)
248            .and_then(|temporal_chunk_ids_per_timeline| {
249                temporal_chunk_ids_per_timeline.get(timeline)
250            })
251            .map_or_else(
252                ChunkStoreChunkStats::default,
253                |chunk_id_sets| -> ChunkStoreChunkStats {
254                    chunk_id_sets
255                        .per_start_time
256                        .values()
257                        .flat_map(|chunk_ids| chunk_ids.iter())
258                        .filter_map(|id| self.chunks_per_chunk_id.get(id))
259                        .map(ChunkStoreChunkStats::from_chunk)
260                        .sum()
261                },
262            )
263    }
264}
265
266/// ## Component path stats
267impl ChunkStore {
268    /// Returns the number of static events logged for an entity for a specific component.
269    ///
270    /// This ignores temporal events.
271    pub fn num_static_events_for_component(
272        &self,
273        entity_path: &EntityPath,
274        component: ComponentIdentifier,
275    ) -> u64 {
276        re_tracing::profile_function!();
277
278        self.static_chunk_ids_per_entity
279            .get(entity_path)
280            .and_then(|static_chunks_per_component| static_chunks_per_component.get(&component))
281            .and_then(|chunk_id| self.chunks_per_chunk_id.get(chunk_id))
282            .and_then(|chunk| chunk.num_events_for_component(component))
283            .unwrap_or(0)
284    }
285
286    /// Returns the number of temporal events logged for an entity for a specific component on a given timeline.
287    ///
288    /// This ignores static events.
289    pub fn num_temporal_events_for_component_on_timeline(
290        &self,
291        timeline: &TimelineName,
292        entity_path: &EntityPath,
293        component: ComponentIdentifier,
294    ) -> u64 {
295        re_tracing::profile_function!();
296
297        self.temporal_chunk_ids_per_entity_per_component
298            .get(entity_path)
299            .and_then(|temporal_chunk_ids_per_timeline| {
300                temporal_chunk_ids_per_timeline.get(timeline)
301            })
302            .and_then(|temporal_chunk_ids_per_component| {
303                temporal_chunk_ids_per_component.get(&component)
304            })
305            .map_or(0, |chunk_id_sets| {
306                chunk_id_sets
307                    .per_start_time
308                    .values()
309                    .flat_map(|chunk_ids| chunk_ids.iter())
310                    .filter_map(|chunk_id| self.chunks_per_chunk_id.get(chunk_id))
311                    .filter_map(|chunk| chunk.num_events_for_component(component))
312                    .sum()
313            })
314    }
315
316    /// Returns the number of temporal events logged for an entity for a specific component on all timelines.
317    ///
318    /// This ignores static events.
319    pub fn num_temporal_events_for_component_on_all_timelines(
320        &self,
321        entity_path: &EntityPath,
322        component: ComponentIdentifier,
323    ) -> u64 {
324        self.timelines()
325            .keys()
326            .map(|timeline| {
327                self.num_temporal_events_for_component_on_timeline(timeline, entity_path, component)
328            })
329            .sum()
330    }
331}