Skip to main content

uni_store/storage/
csr.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! Compressed Sparse Row (CSR) representation of adjacency.
5//!
6//! In the new storage model, CSR uses DenseIdx for internal indexing.
7//! VidRemapper handles the mapping between sparse VIDs and dense indices.
8
9use crate::runtime::VidRemapper;
10use uni_common::core::id::{DenseIdx, Eid, Vid};
11
12/// Compressed Sparse Row (CSR) representation of adjacency
13///
14/// Optimized for:
15/// 1. Low memory footprint (offsets are u32)
16/// 2. O(1) neighbor lookup using DenseIdx
17/// 3. CPU cache locality (contiguous memory)
18///
19/// Note: CSR uses DenseIdx internally. Use VidRemapper to convert Vid ↔ DenseIdx.
20#[derive(Clone)]
21pub struct CompressedSparseRow {
22    /// Offset into neighbors for vertex i: offsets[i]..offsets[i+1]
23    /// Index by DenseIdx
24    offsets: Vec<u32>,
25
26    /// Flattened neighbor list (stored as DenseIdx for internal operations)
27    neighbors: Vec<DenseIdx>,
28
29    /// Neighbor VIDs (for when caller needs actual VIDs)
30    neighbor_vids: Vec<Vid>,
31
32    /// Edge IDs parallel to neighbors
33    edge_ids: Vec<Eid>,
34}
35
36impl CompressedSparseRow {
37    /// Creates a new CSR from a list of edges.
38    ///
39    /// # Arguments
40    /// * `entries` - List of (src_vid, dst_vid, eid) tuples
41    /// * `remapper` - Mutable remapper that will be populated with all VIDs
42    ///
43    /// Returns the CSR with all vertices remapped to dense indices.
44    pub fn from_edges(entries: Vec<(Vid, Vid, Eid)>, remapper: &mut VidRemapper) -> Self {
45        if entries.is_empty() {
46            return Self {
47                offsets: vec![0],
48                neighbors: Vec::new(),
49                neighbor_vids: Vec::new(),
50                edge_ids: Vec::new(),
51            };
52        }
53
54        // First pass: insert all VIDs into remapper
55        for (src, dst, _) in &entries {
56            remapper.insert(*src);
57            remapper.insert(*dst);
58        }
59
60        // Convert to (dense_src, dst_vid, eid) and sort by src
61        let mut edges: Vec<(DenseIdx, Vid, Eid)> = entries
62            .iter()
63            .map(|(src, dst, eid)| (remapper.to_dense(*src).unwrap(), *dst, *eid))
64            .collect();
65
66        edges.sort_by_key(|(src, _, _)| *src);
67
68        let max_dense = remapper.len();
69        let mut offsets = vec![0u32; max_dense + 1];
70        let mut neighbors = Vec::with_capacity(edges.len());
71        let mut neighbor_vids = Vec::with_capacity(edges.len());
72        let mut edge_ids = Vec::with_capacity(edges.len());
73
74        let mut current_src = DenseIdx::new(0);
75        let mut current_offset = 0u32;
76
77        for (src_dense, dst_vid, eid) in edges {
78            // Fill gaps in offsets
79            while current_src < src_dense {
80                offsets[current_src.as_usize() + 1] = current_offset;
81                current_src = DenseIdx::new(current_src.as_u32() + 1);
82            }
83
84            let dst_dense = remapper.to_dense(dst_vid).unwrap();
85            neighbors.push(dst_dense);
86            neighbor_vids.push(dst_vid);
87            edge_ids.push(eid);
88            current_offset += 1;
89        }
90
91        // Fill remaining offsets
92        while current_src.as_usize() < max_dense {
93            offsets[current_src.as_usize() + 1] = current_offset;
94            current_src = DenseIdx::new(current_src.as_u32() + 1);
95        }
96
97        Self {
98            offsets,
99            neighbors,
100            neighbor_vids,
101            edge_ids,
102        }
103    }
104
105    /// Creates a CSR from pre-sorted edges with src as u64 offset.
106    ///
107    /// Entries are (src_offset, neighbor_vid, eid).
108    pub fn new(max_vid_offset: usize, entries: Vec<(u64, Vid, Eid)>) -> Self {
109        if entries.is_empty() {
110            return Self {
111                offsets: vec![0],
112                neighbors: Vec::new(),
113                neighbor_vids: Vec::new(),
114                edge_ids: Vec::new(),
115            };
116        }
117
118        // Sort by src_offset
119        let mut sorted = entries;
120        sorted.sort_by_key(|(src, _, _)| *src);
121
122        let mut offsets = vec![0u32; max_vid_offset + 2];
123        let mut neighbors = Vec::with_capacity(sorted.len());
124        let mut neighbor_vids = Vec::with_capacity(sorted.len());
125        let mut edge_ids = Vec::with_capacity(sorted.len());
126
127        let mut current_offset = 0u32;
128        let mut last_src = 0usize;
129
130        for (src, neighbor, eid) in sorted {
131            let src_idx = src as usize;
132
133            // Fill gaps
134            if src_idx > last_src {
135                for offset in offsets.iter_mut().take(src_idx + 1).skip(last_src + 1) {
136                    *offset = current_offset;
137                }
138            }
139            last_src = src_idx;
140
141            // Store neighbor as DenseIdx (using VID's raw value as dense index)
142            neighbors.push(DenseIdx::new(neighbor.as_u64() as u32));
143            neighbor_vids.push(neighbor);
144            edge_ids.push(eid);
145            current_offset += 1;
146        }
147
148        // Fill remaining offsets
149        for offset in offsets.iter_mut().skip(last_src + 1) {
150            *offset = current_offset;
151        }
152
153        Self {
154            offsets,
155            neighbors,
156            neighbor_vids,
157            edge_ids,
158        }
159    }
160
161    /// O(1) neighbor lookup using DenseIdx.
162    ///
163    /// Returns slices of (neighbor dense indices, neighbor VIDs, edge IDs).
164    pub fn get_neighbors_dense(&self, idx: DenseIdx) -> (&[DenseIdx], &[Vid], &[Eid]) {
165        let i = idx.as_usize();
166        if i + 1 >= self.offsets.len() {
167            return (&[], &[], &[]);
168        }
169
170        let start = self.offsets[i] as usize;
171        let end = self.offsets[i + 1] as usize;
172
173        if start >= self.neighbors.len() || end > self.neighbors.len() {
174            return (&[], &[], &[]);
175        }
176
177        (
178            &self.neighbors[start..end],
179            &self.neighbor_vids[start..end],
180            &self.edge_ids[start..end],
181        )
182    }
183
184    /// O(1) neighbor lookup using Vid directly.
185    ///
186    /// Looks up using VID's raw value as offset.
187    /// Returns slices of (neighbor VIDs, edge IDs).
188    pub fn get_neighbors(&self, vid: Vid) -> (&[Vid], &[Eid]) {
189        // Use the VID's raw value as the index
190        let local = vid.as_u64() as usize;
191        if local + 1 >= self.offsets.len() {
192            return (&[], &[]);
193        }
194
195        let start = self.offsets[local] as usize;
196        let end = self.offsets[local + 1] as usize;
197
198        if start >= self.neighbor_vids.len() || end > self.neighbor_vids.len() {
199            return (&[], &[]);
200        }
201
202        (&self.neighbor_vids[start..end], &self.edge_ids[start..end])
203    }
204
205    /// Approximate memory usage in bytes.
206    pub fn memory_usage(&self) -> usize {
207        self.offsets.len() * 4
208            + self.neighbors.len() * 4
209            + self.neighbor_vids.len() * 8
210            + self.edge_ids.len() * 8
211    }
212
213    /// Returns the number of vertices (rows) in the CSR.
214    pub fn num_vertices(&self) -> usize {
215        if self.offsets.is_empty() {
216            0
217        } else {
218            self.offsets.len() - 1
219        }
220    }
221
222    /// Returns the number of edges in the CSR.
223    pub fn num_edges(&self) -> usize {
224        self.edge_ids.len()
225    }
226
227    /// Iterate over all edges in the CSR.
228    /// Returns iterator over (src_offset, dst_vid, eid).
229    pub fn iter_all(&self) -> impl Iterator<Item = (u64, Vid, Eid)> + '_ {
230        (0..self.offsets.len().saturating_sub(1)).flat_map(move |i| {
231            let start = self.offsets[i] as usize;
232            let end = self.offsets[i + 1] as usize;
233            (start..end).map(move |j| (i as u64, self.neighbor_vids[j], self.edge_ids[j]))
234        })
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_csr_from_edges() {
244        let mut remapper = VidRemapper::new();
245
246        let edges = vec![
247            (Vid::new(100), Vid::new(200), Eid::new(1)),
248            (Vid::new(100), Vid::new(300), Eid::new(2)),
249            (Vid::new(200), Vid::new(300), Eid::new(3)),
250        ];
251
252        let csr = CompressedSparseRow::from_edges(edges, &mut remapper);
253
254        // Check remapper has all VIDs
255        assert_eq!(remapper.len(), 3);
256        assert!(remapper.contains(Vid::new(100)));
257        assert!(remapper.contains(Vid::new(200)));
258        assert!(remapper.contains(Vid::new(300)));
259
260        // Check neighbors for vid=100
261        let idx100 = remapper.to_dense(Vid::new(100)).unwrap();
262        let (_, vids, eids) = csr.get_neighbors_dense(idx100);
263        assert_eq!(vids.len(), 2);
264        assert_eq!(eids.len(), 2);
265    }
266
267    #[test]
268    fn test_csr_empty() {
269        let mut remapper = VidRemapper::new();
270        let csr = CompressedSparseRow::from_edges(vec![], &mut remapper);
271        assert_eq!(csr.num_edges(), 0);
272    }
273}
274
275/// Entry in a versioned CSR that tracks when each edge was created.
276#[derive(Debug, Clone, Copy)]
277pub struct CsrEdgeEntry {
278    /// Neighbor vertex ID.
279    pub neighbor_vid: Vid,
280    /// Edge ID.
281    pub eid: Eid,
282    /// Version at which this edge was created.
283    pub created_version: u64,
284}
285
286/// Versioned CSR for the dual-CSR adjacency architecture.
287///
288/// Stores adjacency with per-edge version metadata, enabling
289/// snapshot queries that filter by version without rebuilding.
290/// Uses VID raw values as offsets (same as [`CompressedSparseRow`]).
291#[derive(Clone)]
292pub struct MainCsr {
293    /// Offset into entries for vertex i: offsets[i]..offsets[i+1]
294    offsets: Vec<u32>,
295    /// Flattened entries with version metadata.
296    entries: Vec<CsrEdgeEntry>,
297}
298
299impl MainCsr {
300    /// Creates a MainCsr from versioned edge entries.
301    ///
302    /// # Arguments
303    /// * `max_vid_offset` - Maximum VID offset value
304    /// * `entries` - (src_offset, neighbor_vid, eid, created_version) tuples
305    pub fn from_edge_entries(max_vid_offset: usize, mut raw: Vec<(u64, Vid, Eid, u64)>) -> Self {
306        if raw.is_empty() {
307            return Self {
308                offsets: vec![0],
309                entries: Vec::new(),
310            };
311        }
312
313        raw.sort_by_key(|(src, _, _, _)| *src);
314
315        let mut offsets = vec![0u32; max_vid_offset + 2];
316        let mut entries = Vec::with_capacity(raw.len());
317
318        let mut current_offset = 0u32;
319        let mut last_src = 0usize;
320
321        for (src, neighbor_vid, eid, created_version) in raw {
322            let src_idx = src as usize;
323
324            if src_idx > last_src {
325                for offset in offsets.iter_mut().take(src_idx + 1).skip(last_src + 1) {
326                    *offset = current_offset;
327                }
328            }
329            last_src = src_idx;
330
331            entries.push(CsrEdgeEntry {
332                neighbor_vid,
333                eid,
334                created_version,
335            });
336            current_offset += 1;
337        }
338
339        for offset in offsets.iter_mut().skip(last_src + 1) {
340            *offset = current_offset;
341        }
342
343        Self { offsets, entries }
344    }
345
346    /// O(1) versioned entry lookup by VID.
347    pub fn get_entries(&self, vid: Vid) -> &[CsrEdgeEntry] {
348        let local = vid.as_u64() as usize;
349        if local + 1 >= self.offsets.len() {
350            return &[];
351        }
352        let start = self.offsets[local] as usize;
353        let end = self.offsets[local + 1] as usize;
354        if start >= self.entries.len() || end > self.entries.len() {
355            return &[];
356        }
357        &self.entries[start..end]
358    }
359
360    /// O(1) neighbor lookup (ignores version).
361    pub fn get_neighbors_unversioned(&self, vid: Vid) -> (Vec<Vid>, Vec<Eid>) {
362        let entries = self.get_entries(vid);
363        let vids: Vec<Vid> = entries.iter().map(|e| e.neighbor_vid).collect();
364        let eids: Vec<Eid> = entries.iter().map(|e| e.eid).collect();
365        (vids, eids)
366    }
367
368    /// Approximate memory usage in bytes.
369    pub fn memory_usage(&self) -> usize {
370        self.offsets.len() * 4 + self.entries.len() * std::mem::size_of::<CsrEdgeEntry>()
371    }
372
373    /// Returns the number of edges.
374    pub fn num_edges(&self) -> usize {
375        self.entries.len()
376    }
377
378    /// Returns the number of vertices (rows) in the CSR.
379    pub fn num_vertices(&self) -> usize {
380        if self.offsets.is_empty() {
381            0
382        } else {
383            self.offsets.len() - 1
384        }
385    }
386}
387
388#[cfg(test)]
389mod main_csr_tests {
390    use super::*;
391
392    #[test]
393    fn test_main_csr_basic() {
394        let entries = vec![
395            (0u64, Vid::new(1), Eid::new(100), 1u64),
396            (0u64, Vid::new(2), Eid::new(101), 2u64),
397            (1u64, Vid::new(2), Eid::new(102), 1u64),
398        ];
399
400        let csr = MainCsr::from_edge_entries(2, entries);
401
402        let e0 = csr.get_entries(Vid::new(0));
403        assert_eq!(e0.len(), 2);
404        assert_eq!(e0[0].neighbor_vid, Vid::new(1));
405        assert_eq!(e0[0].created_version, 1);
406        assert_eq!(e0[1].neighbor_vid, Vid::new(2));
407
408        let e1 = csr.get_entries(Vid::new(1));
409        assert_eq!(e1.len(), 1);
410        assert_eq!(e1[0].eid, Eid::new(102));
411    }
412
413    #[test]
414    fn test_main_csr_empty() {
415        let csr = MainCsr::from_edge_entries(0, vec![]);
416        assert_eq!(csr.num_edges(), 0);
417        assert_eq!(csr.get_entries(Vid::new(0)).len(), 0);
418    }
419
420    #[test]
421    fn test_main_csr_get_neighbors() {
422        let entries = vec![
423            (0u64, Vid::new(10), Eid::new(100), 1u64),
424            (0u64, Vid::new(20), Eid::new(101), 2u64),
425        ];
426        let csr = MainCsr::from_edge_entries(0, entries);
427        let (vids, eids) = csr.get_neighbors_unversioned(Vid::new(0));
428        assert_eq!(vids, vec![Vid::new(10), Vid::new(20)]);
429        assert_eq!(eids, vec![Eid::new(100), Eid::new(101)]);
430    }
431}