Skip to main content

nodedb_graph/csr/index/
types.rs

1//! `CsrIndex` struct definition and constructor.
2//!
3//! Memory layout at scale (1B edges):
4//! - Old: `Vec<Vec<(String, u32)>>` ≈ 60 GB (heap String per edge)
5//! - New: contiguous `Vec<u32>` offsets + targets + `Vec<u32>` labels ≈ 12 GB
6//!
7//! Writes accumulate in a mutable buffer (`buffer_out`/`buffer_in`).
8//! Reads check both the dense CSR arrays and the mutable buffer.
9//! `compact()` merges the buffer into the dense arrays (double-buffered swap).
10//!
11//! ## Edge Weights
12//!
13//! Optional `f64` weight per edge stored in parallel arrays. `None` when the
14//! graph is entirely unweighted (zero memory overhead). Populated from the
15//! `"weight"` edge property at insertion time. Unweighted edges default to 1.0.
16
17use std::collections::{HashMap, HashSet};
18
19use crate::csr::dense_array::DenseArray;
20
21// Re-export shared Direction from nodedb-types.
22pub use nodedb_types::graph::Direction;
23
24/// Dense integer CSR adjacency index with interned node IDs and labels.
25pub struct CsrIndex {
26    // ── Node interning ──
27    pub(crate) node_to_id: HashMap<String, u32>,
28    pub(crate) id_to_node: Vec<String>,
29
30    // ── Label interning ──
31    pub(crate) label_to_id: HashMap<String, u32>,
32    pub(crate) id_to_label: Vec<String>,
33
34    // ── Dense CSR (read-only between compactions) ──
35    //
36    // Offsets are `Vec<u32>` (mutable — extended on node creation).
37    // Targets/labels/weights use `DenseArray<T>` for zero-copy mmap support:
38    // after cold start from rkyv checkpoint, these point directly into the
39    // archived buffer with no deserialization. Compaction replaces them with
40    // owned Vecs.
41    /// `out_offsets[i]..out_offsets[i+1]` = range in `out_targets`/`out_labels`.
42    /// Length: `num_nodes + 1`.
43    pub(crate) out_offsets: Vec<u32>,
44    pub(crate) out_targets: DenseArray<u32>,
45    pub(crate) out_labels: DenseArray<u32>,
46    /// Parallel edge weight array. `None` if graph has no weighted edges.
47    pub(crate) out_weights: Option<DenseArray<f64>>,
48
49    pub(crate) in_offsets: Vec<u32>,
50    pub(crate) in_targets: DenseArray<u32>,
51    pub(crate) in_labels: DenseArray<u32>,
52    /// Parallel inbound edge weight array. `None` if graph has no weighted edges.
53    pub(crate) in_weights: Option<DenseArray<f64>>,
54
55    // ── Mutable write buffer ──
56    /// Per-node outbound buffer: `buffer_out[node_id]` = `[(label_id, dst_id)]`.
57    pub(crate) buffer_out: Vec<Vec<(u32, u32)>>,
58    pub(crate) buffer_in: Vec<Vec<(u32, u32)>>,
59    /// Per-node outbound weight buffer (parallel to `buffer_out`).
60    /// Only populated when `has_weights` is true.
61    pub(crate) buffer_out_weights: Vec<Vec<f64>>,
62    /// Per-node inbound weight buffer (parallel to `buffer_in`).
63    pub(crate) buffer_in_weights: Vec<Vec<f64>>,
64
65    /// Edges deleted since last compaction: `(src, label, dst)`.
66    pub(crate) deleted_edges: HashSet<(u32, u32, u32)>,
67
68    /// Whether any edge has a non-default weight. When false, weight arrays
69    /// are `None` and weight buffers are empty — zero overhead for unweighted graphs.
70    pub(crate) has_weights: bool,
71
72    // ── Node labels (bitset) ──
73    //
74    // Each node has a `u64` bitset where bit `i` corresponds to label ID `i`.
75    // Supports up to 64 distinct node labels. Labels are interned in
76    // `node_label_to_id` / `node_label_names` (separate from edge labels).
77    // Used by MATCH pattern `(a:Person)` — filters nodes by label membership.
78    pub(crate) node_label_bits: Vec<u64>,
79    pub(crate) node_label_to_id: HashMap<String, u8>,
80    pub(crate) node_label_names: Vec<String>,
81
82    // ── Hot/cold access tracking ──
83    /// Per-node access counter: incremented on each neighbor/BFS/path query.
84    /// Uses `Cell<u32>` so access can be tracked through `&self` references
85    /// (traversal methods are `&self` for shared read access).
86    pub(crate) access_counts: Vec<std::cell::Cell<u32>>,
87    /// Total queries served since last access counter reset.
88    pub(crate) query_epoch: u64,
89}
90
91impl Default for CsrIndex {
92    fn default() -> Self {
93        Self::new()
94    }
95}
96
97impl CsrIndex {
98    pub fn new() -> Self {
99        Self {
100            node_to_id: HashMap::new(),
101            id_to_node: Vec::new(),
102            label_to_id: HashMap::new(),
103            id_to_label: Vec::new(),
104            out_offsets: vec![0],
105            out_targets: DenseArray::default(),
106            out_labels: DenseArray::default(),
107            out_weights: None,
108            in_offsets: vec![0],
109            in_targets: DenseArray::default(),
110            in_labels: DenseArray::default(),
111            in_weights: None,
112            buffer_out: Vec::new(),
113            buffer_in: Vec::new(),
114            buffer_out_weights: Vec::new(),
115            buffer_in_weights: Vec::new(),
116            deleted_edges: HashSet::new(),
117            has_weights: false,
118            node_label_bits: Vec::new(),
119            node_label_to_id: HashMap::new(),
120            node_label_names: Vec::new(),
121            access_counts: Vec::new(),
122            query_epoch: 0,
123        }
124    }
125}