nodedb_graph/csr/index/types.rs
1//! `CsrIndex` struct definition and constructor.
2//!
3//! Memory layout at scale (1B edges):
4//! - Old: `Vec<Vec<(String, u32)>>` ≈ 60 GB (heap String per edge)
5//! - New: contiguous `Vec<u32>` offsets + targets + `Vec<u32>` labels ≈ 12 GB
6//!
7//! Writes accumulate in a mutable buffer (`buffer_out`/`buffer_in`).
8//! Reads check both the dense CSR arrays and the mutable buffer.
9//! `compact()` merges the buffer into the dense arrays (double-buffered swap).
10//!
11//! ## Edge Weights
12//!
13//! Optional `f64` weight per edge stored in parallel arrays. `None` when the
14//! graph is entirely unweighted (zero memory overhead). Populated from the
15//! `"weight"` edge property at insertion time. Unweighted edges default to 1.0.
16
17use std::collections::{HashMap, HashSet};
18
19use crate::csr::dense_array::DenseArray;
20
21// Re-export shared Direction from nodedb-types.
22pub use nodedb_types::graph::Direction;
23
24/// Dense integer CSR adjacency index with interned node IDs and labels.
25pub struct CsrIndex {
26 // ── Node interning ──
27 pub(crate) node_to_id: HashMap<String, u32>,
28 pub(crate) id_to_node: Vec<String>,
29
30 // ── Label interning ──
31 pub(crate) label_to_id: HashMap<String, u32>,
32 pub(crate) id_to_label: Vec<String>,
33
34 // ── Dense CSR (read-only between compactions) ──
35 //
36 // Offsets are `Vec<u32>` (mutable — extended on node creation).
37 // Targets/labels/weights use `DenseArray<T>` for zero-copy mmap support:
38 // after cold start from rkyv checkpoint, these point directly into the
39 // archived buffer with no deserialization. Compaction replaces them with
40 // owned Vecs.
41 /// `out_offsets[i]..out_offsets[i+1]` = range in `out_targets`/`out_labels`.
42 /// Length: `num_nodes + 1`.
43 pub(crate) out_offsets: Vec<u32>,
44 pub(crate) out_targets: DenseArray<u32>,
45 pub(crate) out_labels: DenseArray<u32>,
46 /// Parallel edge weight array. `None` if graph has no weighted edges.
47 pub(crate) out_weights: Option<DenseArray<f64>>,
48
49 pub(crate) in_offsets: Vec<u32>,
50 pub(crate) in_targets: DenseArray<u32>,
51 pub(crate) in_labels: DenseArray<u32>,
52 /// Parallel inbound edge weight array. `None` if graph has no weighted edges.
53 pub(crate) in_weights: Option<DenseArray<f64>>,
54
55 // ── Mutable write buffer ──
56 /// Per-node outbound buffer: `buffer_out[node_id]` = `[(label_id, dst_id)]`.
57 pub(crate) buffer_out: Vec<Vec<(u32, u32)>>,
58 pub(crate) buffer_in: Vec<Vec<(u32, u32)>>,
59 /// Per-node outbound weight buffer (parallel to `buffer_out`).
60 /// Only populated when `has_weights` is true.
61 pub(crate) buffer_out_weights: Vec<Vec<f64>>,
62 /// Per-node inbound weight buffer (parallel to `buffer_in`).
63 pub(crate) buffer_in_weights: Vec<Vec<f64>>,
64
65 /// Edges deleted since last compaction: `(src, label, dst)`.
66 pub(crate) deleted_edges: HashSet<(u32, u32, u32)>,
67
68 /// Whether any edge has a non-default weight. When false, weight arrays
69 /// are `None` and weight buffers are empty — zero overhead for unweighted graphs.
70 pub(crate) has_weights: bool,
71
72 // ── Node labels (bitset) ──
73 //
74 // Each node has a `u64` bitset where bit `i` corresponds to label ID `i`.
75 // Supports up to 64 distinct node labels. Labels are interned in
76 // `node_label_to_id` / `node_label_names` (separate from edge labels).
77 // Used by MATCH pattern `(a:Person)` — filters nodes by label membership.
78 pub(crate) node_label_bits: Vec<u64>,
79 pub(crate) node_label_to_id: HashMap<String, u8>,
80 pub(crate) node_label_names: Vec<String>,
81
82 // ── Hot/cold access tracking ──
83 /// Per-node access counter: incremented on each neighbor/BFS/path query.
84 /// Uses `Cell<u32>` so access can be tracked through `&self` references
85 /// (traversal methods are `&self` for shared read access).
86 pub(crate) access_counts: Vec<std::cell::Cell<u32>>,
87 /// Total queries served since last access counter reset.
88 pub(crate) query_epoch: u64,
89}
90
91impl Default for CsrIndex {
92 fn default() -> Self {
93 Self::new()
94 }
95}
96
97impl CsrIndex {
98 pub fn new() -> Self {
99 Self {
100 node_to_id: HashMap::new(),
101 id_to_node: Vec::new(),
102 label_to_id: HashMap::new(),
103 id_to_label: Vec::new(),
104 out_offsets: vec![0],
105 out_targets: DenseArray::default(),
106 out_labels: DenseArray::default(),
107 out_weights: None,
108 in_offsets: vec![0],
109 in_targets: DenseArray::default(),
110 in_labels: DenseArray::default(),
111 in_weights: None,
112 buffer_out: Vec::new(),
113 buffer_in: Vec::new(),
114 buffer_out_weights: Vec::new(),
115 buffer_in_weights: Vec::new(),
116 deleted_edges: HashSet::new(),
117 has_weights: false,
118 node_label_bits: Vec::new(),
119 node_label_to_id: HashMap::new(),
120 node_label_names: Vec::new(),
121 access_counts: Vec::new(),
122 query_epoch: 0,
123 }
124 }
125}