Skip to main content

lora_store/memory/
mem_report.rs

1//! Heap-byte breakdown of an [`InMemoryGraph`](super::InMemoryGraph).
2//!
3//! This is a *debug-only* estimator: it walks the graph's owned
4//! structures and sums an approximation of their retained heap bytes,
5//! attributed to the component that owns each allocation (node slab,
6//! relationship slab, adjacency, label/type indexes, each secondary
7//! index registry, the index catalog, the constraint catalog).
8//!
9//! The numbers it returns are *approximate*. `BTreeMap` and `HashMap`
10//! both carry node/bucket overhead that varies with allocator state,
11//! load factor, and Rust version; we use fixed amortised constants
12//! ([`BTREE_PER_ENTRY`], [`HASHMAP_PER_ENTRY`]) so that callers can
13//! diff two reports without the per-run noise an allocator-walking
14//! profiler would introduce. For property keys, we count the
15//! `Arc<str>` header but *not* the shared byte buffer — keys go
16//! through the process-wide `super::super::intern` table, so adding
17//! their content per occurrence would dramatically over-count the
18//! marginal cost a property bag actually adds to the graph.
19//!
20//! Used by `crates/lora-database/benches/memory.rs` and the
21//! `mem_probe*` examples in `crates/lora-server/examples/` to
22//! attribute observed RSS growth to a specific component without
23//! resorting to `dhat` / `heaptrack` for every run.
24
25use std::collections::BTreeMap;
26use std::mem::size_of;
27
28use crate::{LoraBinary, LoraPoint, LoraVector, NodeRecord, PropertyValue, RelationshipRecord};
29
30use super::entity_index_store::{IndexBundle, ScopedPropertyKey};
31use super::fulltext_index::FulltextRegistry;
32use super::hnsw::HnswBackend;
33use super::index_catalog::{IndexCatalog, StoredIndexEntity};
34use super::point_index::PointRegistry;
35use super::property_index::{
36    PropertyIndex, PropertyIndexKey, PropertyIndexRegistry, PropertyIndexState,
37};
38use super::sorted_property_index::SortedPropertyIndex;
39use super::text_index::TrigramRegistry;
40use super::vector_index::{FlatBackend, VectorBackend, VectorIndexRegistry};
41use super::ConstraintCatalog;
42
43/// Amortised heap overhead per `BTreeMap` entry. Empirically a BTree
44/// inner node holds ~11 entries in a ~256-byte allocation, so the
45/// per-entry share is roughly 24 bytes.
46const BTREE_PER_ENTRY: usize = 24;
47
48/// Amortised heap overhead per `HashMap` entry — bucket pointer plus
49/// hash table load factor.
50const HASHMAP_PER_ENTRY: usize = 24;
51
52/// `std::sync::Arc<T>`: strong + weak refcount header that precedes
53/// the inline `T` allocation.
54const ARC_HEADER: usize = 16;
55
56/// Aggregated retained-heap breakdown of an [`InMemoryGraph`].
57///
58/// All fields are in *bytes*. `usize` was chosen over `u64` so call
59/// sites can use `cargo bench` reports and pretty-print without casts;
60/// on 32-bit hosts a graph that overflowed this would already have
61/// failed earlier.
62#[derive(Debug, Default, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
63#[serde(rename_all = "camelCase")]
64pub struct MemoryReport {
65    pub live_node_count: usize,
66    pub live_relationship_count: usize,
67    pub node_tombstone_count: usize,
68    pub relationship_tombstone_count: usize,
69
70    /// `nodes: Vec<Option<Arc<NodeRecord>>>` — slot vector plus the
71    /// `Arc` headers plus deep payload of each live record (labels,
72    /// properties — see [`property_value_heap_bytes`]).
73    pub nodes_bytes: usize,
74    /// `relationships: Vec<Option<Arc<RelationshipRecord>>>`.
75    pub relationships_bytes: usize,
76
77    /// `outgoing: Vec<Vec<RelationshipId>>` — outer Vec capacity plus
78    /// the sum of each inner Vec's capacity in bytes.
79    pub outgoing_bytes: usize,
80    /// Same for `incoming`.
81    pub incoming_bytes: usize,
82
83    /// `nodes_by_label: BTreeMap<String, Vec<NodeId>>`.
84    pub label_index_bytes: usize,
85    /// `relationships_by_type: BTreeMap<String, Vec<RelationshipId>>`.
86    pub type_index_bytes: usize,
87
88    /// Hash-bucket property registry (`find_*_by_property`).
89    pub property_index_bytes: usize,
90    /// Sorted-range property index (`RANGE` catalog entries).
91    pub sorted_index_bytes: usize,
92    /// Trigram TEXT indexes.
93    pub text_index_bytes: usize,
94    /// Spatial grid POINT indexes.
95    pub point_index_bytes: usize,
96    /// FULLTEXT inverted indexes.
97    pub fulltext_index_bytes: usize,
98    /// VECTOR indexes (Flat and HNSW backends).
99    pub vector_index_bytes: usize,
100
101    pub index_catalog_bytes: usize,
102    pub constraint_catalog_bytes: usize,
103}
104
105impl MemoryReport {
106    /// Bytes attributable to the core graph shape: slabs, adjacency,
107    /// and the label/type indexes. This is the floor every workload
108    /// pays regardless of which secondary indexes are installed.
109    pub fn graph_core_bytes(&self) -> usize {
110        self.nodes_bytes
111            + self.relationships_bytes
112            + self.outgoing_bytes
113            + self.incoming_bytes
114            + self.label_index_bytes
115            + self.type_index_bytes
116    }
117
118    /// Bytes attributable to secondary indexes — the amount that would
119    /// be reclaimed by dropping every index.
120    pub fn secondary_index_bytes(&self) -> usize {
121        self.property_index_bytes
122            + self.sorted_index_bytes
123            + self.text_index_bytes
124            + self.point_index_bytes
125            + self.fulltext_index_bytes
126            + self.vector_index_bytes
127    }
128
129    pub fn catalog_bytes(&self) -> usize {
130        self.index_catalog_bytes + self.constraint_catalog_bytes
131    }
132
133    pub fn total_bytes(&self) -> usize {
134        self.graph_core_bytes() + self.secondary_index_bytes() + self.catalog_bytes()
135    }
136
137    /// Average retained bytes per live node, including its share of
138    /// the slab tombstone overhead. Returns 0 when there are no
139    /// nodes.
140    pub fn bytes_per_live_node(&self) -> f64 {
141        ratio(self.nodes_bytes, self.live_node_count)
142    }
143
144    pub fn bytes_per_live_relationship(&self) -> f64 {
145        ratio(self.relationships_bytes, self.live_relationship_count)
146    }
147
148    /// One-line breakdown for bench / probe output. Stable enough for
149    /// `grep`-friendly snapshot diffs across runs.
150    pub fn summary(&self) -> String {
151        format!(
152            "total={} graph={} (nodes={} rels={} out={} in={} labels={} types={}) idx={} cat={}",
153            self.total_bytes(),
154            self.graph_core_bytes(),
155            self.nodes_bytes,
156            self.relationships_bytes,
157            self.outgoing_bytes,
158            self.incoming_bytes,
159            self.label_index_bytes,
160            self.type_index_bytes,
161            self.secondary_index_bytes(),
162            self.catalog_bytes(),
163        )
164    }
165}
166
167fn ratio(num: usize, denom: usize) -> f64 {
168    if denom == 0 {
169        0.0
170    } else {
171        num as f64 / denom as f64
172    }
173}
174
175pub(super) fn estimate(graph: &super::InMemoryGraph) -> MemoryReport {
176    let mut report = MemoryReport {
177        live_node_count: graph.live_node_count,
178        live_relationship_count: graph.live_rel_count,
179        node_tombstone_count: graph.nodes.len().saturating_sub(graph.live_node_count),
180        relationship_tombstone_count: graph
181            .relationships
182            .len()
183            .saturating_sub(graph.live_rel_count),
184        ..MemoryReport::default()
185    };
186
187    report.nodes_bytes = node_slab_bytes(&graph.nodes);
188    report.relationships_bytes = rel_slab_bytes(&graph.relationships);
189    report.outgoing_bytes = adjacency_bytes(&graph.outgoing);
190    report.incoming_bytes = adjacency_bytes(&graph.incoming);
191    report.label_index_bytes = label_or_type_bytes(&graph.nodes_by_label);
192    report.type_index_bytes = label_or_type_bytes(&graph.relationships_by_type);
193
194    let bundle = &graph.indexes;
195    if let Ok(props) = bundle.properties.read() {
196        report.property_index_bytes = property_registry_bytes(&props);
197    }
198    report.sorted_index_bytes = sorted_registry_bytes(bundle);
199    report.text_index_bytes = text_registry_bytes(bundle);
200    report.point_index_bytes = point_registry_bytes(bundle);
201    report.fulltext_index_bytes = fulltext_registry_bytes(bundle);
202    report.vector_index_bytes = vector_registry_bytes(bundle);
203
204    if let Ok(catalog) = bundle.catalog.read() {
205        report.index_catalog_bytes = index_catalog_bytes(&catalog);
206    }
207    if let Ok(catalog) = graph.constraint_catalog.read() {
208        report.constraint_catalog_bytes = constraint_catalog_bytes(&catalog);
209    }
210
211    report
212}
213
214// ---------------- slab + adjacency ----------------
215
216fn node_slab_bytes(slab: &[Option<std::sync::Arc<NodeRecord>>]) -> usize {
217    let outer = std::mem::size_of_val(slab);
218    let mut payload = 0;
219    for arc in slab.iter().flatten() {
220        payload += ARC_HEADER + size_of::<NodeRecord>() + node_record_heap_bytes(arc);
221    }
222    outer + payload
223}
224
225fn rel_slab_bytes(slab: &[Option<std::sync::Arc<RelationshipRecord>>]) -> usize {
226    let outer = std::mem::size_of_val(slab);
227    let mut payload = 0;
228    for arc in slab.iter().flatten() {
229        payload += ARC_HEADER + size_of::<RelationshipRecord>() + rel_record_heap_bytes(arc);
230    }
231    outer + payload
232}
233
234fn node_record_heap_bytes(record: &NodeRecord) -> usize {
235    let labels = record.labels.capacity() * size_of::<String>()
236        + record.labels.iter().map(|l| l.capacity()).sum::<usize>();
237    labels + properties_heap_bytes(&record.properties)
238}
239
240fn rel_record_heap_bytes(record: &RelationshipRecord) -> usize {
241    record.rel_type.capacity() + properties_heap_bytes(&record.properties)
242}
243
244fn adjacency_bytes<T>(adj: &[Vec<T>]) -> usize {
245    let outer = std::mem::size_of_val(adj);
246    let inner: usize = adj.iter().map(|v| v.capacity() * size_of::<T>()).sum();
247    outer + inner
248}
249
250fn label_or_type_bytes(map: &BTreeMap<String, Vec<u64>>) -> usize {
251    let mut total = 0;
252    for (key, ids) in map {
253        total += BTREE_PER_ENTRY
254            + size_of::<String>()
255            + key.capacity()
256            + size_of::<Vec<u64>>()
257            + ids.capacity() * size_of::<u64>();
258    }
259    total
260}
261
262// ---------------- property values ----------------
263
264/// Bytes beyond the embedded discriminant of a [`PropertyValue`]. The
265/// embedded discriminant itself is counted by the caller (it sits
266/// inside whatever container owns the value).
267pub fn property_value_heap_bytes(value: &PropertyValue) -> usize {
268    match value {
269        PropertyValue::Null
270        | PropertyValue::Bool(_)
271        | PropertyValue::Int(_)
272        | PropertyValue::Float(_)
273        | PropertyValue::Date(_)
274        | PropertyValue::Time(_)
275        | PropertyValue::LocalTime(_)
276        | PropertyValue::LocalDateTime(_)
277        | PropertyValue::DateTime(_)
278        | PropertyValue::Duration(_) => 0,
279        PropertyValue::String(s) => s.capacity(),
280        PropertyValue::Binary(b) => binary_heap_bytes(b),
281        PropertyValue::Point(p) => point_heap_bytes(p),
282        PropertyValue::Vector(v) => vector_heap_bytes(v),
283        PropertyValue::List(items) => {
284            items.capacity() * size_of::<PropertyValue>()
285                + items.iter().map(property_value_heap_bytes).sum::<usize>()
286        }
287        PropertyValue::Map(map) => {
288            let mut total = 0;
289            for (key, value) in map {
290                total += BTREE_PER_ENTRY
291                    + size_of::<String>()
292                    + key.capacity()
293                    + size_of::<PropertyValue>()
294                    + property_value_heap_bytes(value);
295            }
296            total
297        }
298    }
299}
300
301fn binary_heap_bytes(b: &LoraBinary) -> usize {
302    let mut total = std::mem::size_of_val(b.segments());
303    for seg in b.segments() {
304        total += seg.capacity();
305    }
306    total
307}
308
309fn point_heap_bytes(_: &LoraPoint) -> usize {
310    // LoraPoint is fixed-size (3 × f64 + tag + srid). No owned heap.
311    0
312}
313
314fn vector_heap_bytes(v: &LoraVector) -> usize {
315    use crate::types::VectorValues;
316    match &v.values {
317        VectorValues::Float64(xs) => xs.capacity() * size_of::<f64>(),
318        VectorValues::Float32(xs) => xs.capacity() * size_of::<f32>(),
319        VectorValues::Integer64(xs) => xs.capacity() * size_of::<i64>(),
320        VectorValues::Integer32(xs) => xs.capacity() * size_of::<i32>(),
321        VectorValues::Integer16(xs) => xs.capacity() * size_of::<i16>(),
322        VectorValues::Integer8(xs) => xs.capacity() * size_of::<i8>(),
323    }
324}
325
326fn properties_heap_bytes(properties: &crate::Properties) -> usize {
327    let mut total = 0;
328    for value in properties.values() {
329        total += BTREE_PER_ENTRY
330            + ARC_HEADER
331            + size_of::<std::sync::Arc<str>>()
332            + size_of::<PropertyValue>()
333            + property_value_heap_bytes(value);
334    }
335    total
336}
337
338// ---------------- secondary indexes ----------------
339
340fn property_registry_bytes(reg: &PropertyIndexRegistry) -> usize {
341    property_state_bytes(&reg.node_properties) + property_state_bytes(&reg.relationship_properties)
342}
343
344fn property_state_bytes(state: &PropertyIndexState) -> usize {
345    let mut total = 0;
346    for key in &state.active_keys {
347        total += BTREE_PER_ENTRY + size_of::<String>() + key.capacity();
348    }
349    total += property_index_map_bytes(&state.values);
350    for (scope, by_property) in &state.scoped_values {
351        total += HASHMAP_PER_ENTRY + size_of::<String>() + scope.capacity();
352        total += property_index_map_bytes(by_property);
353    }
354    total
355}
356
357fn property_index_map_bytes(values: &PropertyIndex) -> usize {
358    let mut total = 0;
359    for (key, buckets) in values {
360        total += HASHMAP_PER_ENTRY + size_of::<String>() + key.capacity();
361        for (indexed, ids) in buckets {
362            total += HASHMAP_PER_ENTRY
363                + property_index_key_bytes(indexed)
364                + size_of::<Vec<u64>>()
365                + ids.capacity() * size_of::<u64>();
366        }
367    }
368    total
369}
370
371fn property_index_key_bytes(key: &PropertyIndexKey) -> usize {
372    size_of::<PropertyIndexKey>()
373        + match key {
374            PropertyIndexKey::Null
375            | PropertyIndexKey::Bool(_)
376            | PropertyIndexKey::Int(_)
377            | PropertyIndexKey::Float(_) => 0,
378            PropertyIndexKey::String(s) => s.capacity(),
379            PropertyIndexKey::Binary(b) => binary_heap_bytes(b),
380            PropertyIndexKey::List(items) => {
381                items.capacity() * size_of::<PropertyIndexKey>()
382                    + items.iter().map(property_index_key_bytes).sum::<usize>()
383            }
384            PropertyIndexKey::Map(map) => map
385                .iter()
386                .map(|(k, v)| {
387                    BTREE_PER_ENTRY
388                        + size_of::<String>()
389                        + k.capacity()
390                        + property_index_key_bytes(v)
391                })
392                .sum::<usize>(),
393        }
394}
395
396fn scoped_key_bytes(scope: &ScopedPropertyKey) -> usize {
397    size_of::<ScopedPropertyKey>() + scope.label.capacity() + scope.property.capacity()
398}
399
400fn sorted_registry_bytes(bundle: &IndexBundle) -> usize {
401    let node = bundle.sorted.read(StoredIndexEntity::Node);
402    let rel = bundle.sorted.read(StoredIndexEntity::Relationship);
403    sorted_one(&node) + sorted_one(&rel)
404}
405
406fn sorted_one(index: &SortedPropertyIndex) -> usize {
407    let mut total = 0;
408    for (scope, sorted_scope) in &index.by_scope {
409        total += BTREE_PER_ENTRY + scoped_key_bytes(scope);
410        for (indexed, ids) in &sorted_scope.by_value {
411            total += BTREE_PER_ENTRY
412                + property_index_key_bytes(indexed)
413                + ids.len() * (BTREE_PER_ENTRY + size_of::<u64>());
414        }
415    }
416    total
417}
418
419fn text_registry_bytes(bundle: &IndexBundle) -> usize {
420    let node = bundle.text.read(StoredIndexEntity::Node);
421    let rel = bundle.text.read(StoredIndexEntity::Relationship);
422    text_one(&node) + text_one(&rel)
423}
424
425fn text_one(registry: &TrigramRegistry) -> usize {
426    let mut total = 0;
427    for (scope, trigram_scope) in &registry.by_scope {
428        total += HASHMAP_PER_ENTRY + scoped_key_bytes(scope);
429        for ids in trigram_scope.grams.values() {
430            total += BTREE_PER_ENTRY + 3 + ids.len() * (BTREE_PER_ENTRY + size_of::<u64>());
431        }
432    }
433    total
434}
435
436fn point_registry_bytes(bundle: &IndexBundle) -> usize {
437    let node = bundle.point.read(StoredIndexEntity::Node);
438    let rel = bundle.point.read(StoredIndexEntity::Relationship);
439    point_one(&node) + point_one(&rel)
440}
441
442fn point_one(registry: &PointRegistry) -> usize {
443    let mut total = 0;
444    for (scope, scope_data) in &registry.by_scope {
445        total += HASHMAP_PER_ENTRY + scoped_key_bytes(scope);
446        for cell in scope_data.grid.cells.values() {
447            total += HASHMAP_PER_ENTRY
448                + size_of::<Vec<(LoraPoint, u64)>>()
449                + cell.capacity() * size_of::<(LoraPoint, u64)>();
450        }
451    }
452    total
453}
454
455fn fulltext_registry_bytes(bundle: &IndexBundle) -> usize {
456    let node = bundle.fulltext.read(StoredIndexEntity::Node);
457    let rel = bundle.fulltext.read(StoredIndexEntity::Relationship);
458    fulltext_one(&node) + fulltext_one(&rel)
459}
460
461fn fulltext_one(registry: &FulltextRegistry) -> usize {
462    let mut total = 0;
463    for (name, index) in registry.iter() {
464        total += HASHMAP_PER_ENTRY + size_of::<String>() + name.capacity();
465        total += index.labels.capacity() * size_of::<String>();
466        for label in &index.labels {
467            total += label.capacity();
468        }
469        total += index.properties.capacity() * size_of::<String>();
470        for property in &index.properties {
471            total += property.capacity();
472        }
473        for (term, postings) in &index.postings {
474            total += BTREE_PER_ENTRY + size_of::<String>() + term.capacity();
475            total += postings.len() * (BTREE_PER_ENTRY + size_of::<u64>() + size_of::<u32>());
476        }
477        for terms in index.entity_terms.values() {
478            total += BTREE_PER_ENTRY + size_of::<u64>();
479            for term in terms {
480                total += BTREE_PER_ENTRY + size_of::<String>() + term.capacity();
481            }
482        }
483    }
484    total
485}
486
487fn vector_registry_bytes(bundle: &IndexBundle) -> usize {
488    let node = bundle.vector.read(StoredIndexEntity::Node);
489    let rel = bundle.vector.read(StoredIndexEntity::Relationship);
490    vector_one(&node) + vector_one(&rel)
491}
492
493fn vector_one(registry: &VectorIndexRegistry) -> usize {
494    let mut total = 0;
495    for (name, entry) in &registry.by_name {
496        total += BTREE_PER_ENTRY
497            + size_of::<String>()
498            + name.capacity()
499            + entry.label.capacity()
500            + entry.property.capacity();
501        total += match &entry.backend {
502            VectorBackend::Flat(b) => flat_backend_bytes(b),
503            VectorBackend::Hnsw(b) => hnsw_backend_bytes(b),
504        };
505    }
506    total
507}
508
509fn flat_backend_bytes(b: &FlatBackend) -> usize {
510    let mut total = 0;
511    for v in b.items.values() {
512        total +=
513            BTREE_PER_ENTRY + size_of::<u64>() + size_of::<LoraVector>() + vector_heap_bytes(v);
514    }
515    total
516}
517
518fn hnsw_backend_bytes(b: &HnswBackend) -> usize {
519    let mut total = 0;
520    for node in b.nodes.values() {
521        total += BTREE_PER_ENTRY
522            + size_of::<u64>()
523            + size_of::<LoraVector>()
524            + vector_heap_bytes(&node.vector)
525            + size_of::<usize>() // level
526            + node.neighbors.capacity() * size_of::<Vec<u64>>();
527        for layer in &node.neighbors {
528            total += layer.capacity() * size_of::<u64>();
529        }
530    }
531    total
532}
533
534// ---------------- catalogs ----------------
535
536fn index_catalog_bytes(catalog: &IndexCatalog) -> usize {
537    let mut total = 0;
538    for def in catalog.list() {
539        total += BTREE_PER_ENTRY + size_of::<String>() + def.name.capacity();
540        if let Some(label) = &def.label {
541            total += label.capacity();
542        }
543        for label in &def.additional_labels {
544            total += size_of::<String>() + label.capacity();
545        }
546        for property in &def.properties {
547            total += size_of::<String>() + property.capacity();
548        }
549        for key in def.options.keys() {
550            total += BTREE_PER_ENTRY + size_of::<String>() + key.capacity();
551        }
552    }
553    total
554}
555
556fn constraint_catalog_bytes(catalog: &ConstraintCatalog) -> usize {
557    let mut total = 0;
558    for def in catalog.list() {
559        total += BTREE_PER_ENTRY + size_of::<String>() + def.name.capacity() + def.label.capacity();
560        for property in &def.properties {
561            total += size_of::<String>() + property.capacity();
562        }
563        if let Some(idx) = &def.owned_index {
564            total += idx.capacity();
565        }
566    }
567    total
568}