Skip to main content

uni_store/storage/
csr.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! Compressed Sparse Row (CSR) representation of adjacency.
5//!
6//! In the new storage model, CSR uses DenseIdx for internal indexing.
7//! VidRemapper handles the mapping between sparse VIDs and dense indices.
8
9use crate::runtime::VidRemapper;
10use uni_common::core::id::{DenseIdx, Eid, Vid};
11
12/// Compressed Sparse Row (CSR) representation of adjacency
13///
14/// Optimized for:
15/// 1. Low memory footprint (offsets are u32)
16/// 2. O(1) neighbor lookup using DenseIdx
17/// 3. CPU cache locality (contiguous memory)
18///
19/// Note: CSR uses DenseIdx internally. Use VidRemapper to convert Vid ↔ DenseIdx.
20#[derive(Clone)]
21pub struct CompressedSparseRow {
22    /// Offset into neighbors for vertex i: offsets[i]..offsets[i+1]
23    /// Index by DenseIdx
24    offsets: Vec<u32>,
25
26    /// Flattened neighbor list (stored as DenseIdx for internal operations)
27    neighbors: Vec<DenseIdx>,
28
29    /// Neighbor VIDs (for when caller needs actual VIDs)
30    neighbor_vids: Vec<Vid>,
31
32    /// Edge IDs parallel to neighbors
33    edge_ids: Vec<Eid>,
34}
35
36impl CompressedSparseRow {
37    /// Creates a new CSR from a list of edges.
38    ///
39    /// # Arguments
40    /// * `entries` - List of (src_vid, dst_vid, eid) tuples
41    /// * `remapper` - Mutable remapper that will be populated with all VIDs
42    ///
43    /// Returns the CSR with all vertices remapped to dense indices.
44    pub fn from_edges(entries: Vec<(Vid, Vid, Eid)>, remapper: &mut VidRemapper) -> Self {
45        if entries.is_empty() {
46            return Self {
47                offsets: vec![0],
48                neighbors: Vec::new(),
49                neighbor_vids: Vec::new(),
50                edge_ids: Vec::new(),
51            };
52        }
53
54        // First pass: insert all VIDs into remapper
55        for (src, dst, _) in &entries {
56            remapper.insert(*src);
57            remapper.insert(*dst);
58        }
59
60        // Convert to (dense_src, dst_vid, eid) and sort by src
61        let mut edges: Vec<(DenseIdx, Vid, Eid)> = entries
62            .iter()
63            .map(|(src, dst, eid)| (remapper.to_dense(*src).unwrap(), *dst, *eid))
64            .collect();
65
66        edges.sort_by_key(|(src, _, _)| *src);
67
68        let max_dense = remapper.len();
69        let mut offsets = vec![0u32; max_dense + 1];
70        let mut neighbors = Vec::with_capacity(edges.len());
71        let mut neighbor_vids = Vec::with_capacity(edges.len());
72        let mut edge_ids = Vec::with_capacity(edges.len());
73
74        let mut current_src = DenseIdx::new(0);
75        let mut current_offset = 0u32;
76
77        for (src_dense, dst_vid, eid) in edges {
78            // Fill gaps in offsets
79            while current_src < src_dense {
80                offsets[current_src.as_usize() + 1] = current_offset;
81                current_src = DenseIdx::new(current_src.as_u32() + 1);
82            }
83
84            let dst_dense = remapper.to_dense(dst_vid).unwrap();
85            neighbors.push(dst_dense);
86            neighbor_vids.push(dst_vid);
87            edge_ids.push(eid);
88            current_offset += 1;
89        }
90
91        // Fill remaining offsets
92        while current_src.as_usize() < max_dense {
93            offsets[current_src.as_usize() + 1] = current_offset;
94            current_src = DenseIdx::new(current_src.as_u32() + 1);
95        }
96
97        Self {
98            offsets,
99            neighbors,
100            neighbor_vids,
101            edge_ids,
102        }
103    }
104
105    /// Creates a CSR from pre-sorted edges with src as u64 offset.
106    ///
107    /// Entries are (src_offset, neighbor_vid, eid).
108    pub fn new(max_vid_offset: usize, entries: Vec<(u64, Vid, Eid)>) -> Self {
109        if entries.is_empty() {
110            return Self {
111                offsets: vec![0],
112                neighbors: Vec::new(),
113                neighbor_vids: Vec::new(),
114                edge_ids: Vec::new(),
115            };
116        }
117
118        // Sort by src_offset
119        let mut sorted = entries;
120        sorted.sort_by_key(|(src, _, _)| *src);
121
122        let mut offsets = vec![0u32; max_vid_offset + 2];
123        let mut neighbors = Vec::with_capacity(sorted.len());
124        let mut neighbor_vids = Vec::with_capacity(sorted.len());
125        let mut edge_ids = Vec::with_capacity(sorted.len());
126
127        let mut current_offset = 0u32;
128        let mut last_src = 0usize;
129
130        for (src, neighbor, eid) in sorted {
131            let src_idx = src as usize;
132
133            // Fill gaps
134            if src_idx > last_src {
135                for offset in offsets.iter_mut().take(src_idx + 1).skip(last_src + 1) {
136                    *offset = current_offset;
137                }
138            }
139            last_src = src_idx;
140
141            // Store neighbor as DenseIdx (using VID's raw value as dense index).
142            // This legacy dense index is a u32; a VID >= 2^32 cannot be
143            // represented here. The full-width VID is preserved in
144            // `neighbor_vids` (returned by `get_neighbors`), so rather than
145            // silently wrapping the raw value (corrupting the dense slice), we
146            // enforce the domain with a loud, documented failure. (review H13)
147            let dense = u32::try_from(neighbor.as_u64()).expect(
148                "CompressedSparseRow dense index is u32; VID exceeds u32 range \
149                 (use MainCsr/VidRemapper for full-width VIDs)",
150            );
151            neighbors.push(DenseIdx::new(dense));
152            neighbor_vids.push(neighbor);
153            edge_ids.push(eid);
154            current_offset += 1;
155        }
156
157        // Fill remaining offsets
158        for offset in offsets.iter_mut().skip(last_src + 1) {
159            *offset = current_offset;
160        }
161
162        Self {
163            offsets,
164            neighbors,
165            neighbor_vids,
166            edge_ids,
167        }
168    }
169
170    /// O(1) neighbor lookup using DenseIdx.
171    ///
172    /// Returns slices of (neighbor dense indices, neighbor VIDs, edge IDs).
173    pub fn get_neighbors_dense(&self, idx: DenseIdx) -> (&[DenseIdx], &[Vid], &[Eid]) {
174        let i = idx.as_usize();
175        if i + 1 >= self.offsets.len() {
176            return (&[], &[], &[]);
177        }
178
179        let start = self.offsets[i] as usize;
180        let end = self.offsets[i + 1] as usize;
181
182        if start >= self.neighbors.len() || end > self.neighbors.len() {
183            return (&[], &[], &[]);
184        }
185
186        (
187            &self.neighbors[start..end],
188            &self.neighbor_vids[start..end],
189            &self.edge_ids[start..end],
190        )
191    }
192
193    /// O(1) neighbor lookup using Vid directly.
194    ///
195    /// Looks up using VID's raw value as offset.
196    /// Returns slices of (neighbor VIDs, edge IDs).
197    pub fn get_neighbors(&self, vid: Vid) -> (&[Vid], &[Eid]) {
198        // Use the VID's raw value as the index
199        let local = vid.as_u64() as usize;
200        if local + 1 >= self.offsets.len() {
201            return (&[], &[]);
202        }
203
204        let start = self.offsets[local] as usize;
205        let end = self.offsets[local + 1] as usize;
206
207        if start >= self.neighbor_vids.len() || end > self.neighbor_vids.len() {
208            return (&[], &[]);
209        }
210
211        (&self.neighbor_vids[start..end], &self.edge_ids[start..end])
212    }
213
214    /// Approximate memory usage in bytes.
215    pub fn memory_usage(&self) -> usize {
216        self.offsets.len() * 4
217            + self.neighbors.len() * 4
218            + self.neighbor_vids.len() * 8
219            + self.edge_ids.len() * 8
220    }
221
222    /// Returns the number of vertices (rows) in the CSR.
223    pub fn num_vertices(&self) -> usize {
224        if self.offsets.is_empty() {
225            0
226        } else {
227            self.offsets.len() - 1
228        }
229    }
230
231    /// Returns the number of edges in the CSR.
232    pub fn num_edges(&self) -> usize {
233        self.edge_ids.len()
234    }
235
236    /// Iterate over all edges in the CSR.
237    /// Returns iterator over (src_offset, dst_vid, eid).
238    pub fn iter_all(&self) -> impl Iterator<Item = (u64, Vid, Eid)> + '_ {
239        (0..self.offsets.len().saturating_sub(1)).flat_map(move |i| {
240            let start = self.offsets[i] as usize;
241            let end = self.offsets[i + 1] as usize;
242            (start..end).map(move |j| (i as u64, self.neighbor_vids[j], self.edge_ids[j]))
243        })
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_csr_from_edges() {
253        let mut remapper = VidRemapper::new();
254
255        let edges = vec![
256            (Vid::new(100), Vid::new(200), Eid::new(1)),
257            (Vid::new(100), Vid::new(300), Eid::new(2)),
258            (Vid::new(200), Vid::new(300), Eid::new(3)),
259        ];
260
261        let csr = CompressedSparseRow::from_edges(edges, &mut remapper);
262
263        // Check remapper has all VIDs
264        assert_eq!(remapper.len(), 3);
265        assert!(remapper.contains(Vid::new(100)));
266        assert!(remapper.contains(Vid::new(200)));
267        assert!(remapper.contains(Vid::new(300)));
268
269        // Check neighbors for vid=100
270        let idx100 = remapper.to_dense(Vid::new(100)).unwrap();
271        let (_, vids, eids) = csr.get_neighbors_dense(idx100);
272        assert_eq!(vids.len(), 2);
273        assert_eq!(eids.len(), 2);
274    }
275
276    #[test]
277    fn test_csr_empty() {
278        let mut remapper = VidRemapper::new();
279        let csr = CompressedSparseRow::from_edges(vec![], &mut remapper);
280        assert_eq!(csr.num_edges(), 0);
281    }
282}
283
284/// Entry in a versioned CSR that tracks when each edge was created.
285#[derive(Debug, Clone, Copy)]
286pub struct CsrEdgeEntry {
287    /// Neighbor vertex ID.
288    pub neighbor_vid: Vid,
289    /// Edge ID.
290    pub eid: Eid,
291    /// Version at which this edge was created.
292    pub created_version: u64,
293}
294
295/// Versioned CSR for the dual-CSR adjacency architecture.
296///
297/// Stores adjacency with per-edge version metadata, enabling
298/// snapshot queries that filter by version without rebuilding.
299/// Uses VID raw values as offsets (same as [`CompressedSparseRow`]).
300#[derive(Clone)]
301pub struct MainCsr {
302    /// Offset into entries for vertex i: offsets[i]..offsets[i+1]
303    offsets: Vec<u32>,
304    /// Flattened entries with version metadata.
305    entries: Vec<CsrEdgeEntry>,
306}
307
308impl MainCsr {
309    /// Creates a MainCsr from versioned edge entries.
310    ///
311    /// # Arguments
312    /// * `max_vid_offset` - Maximum VID offset value
313    /// * `entries` - (src_offset, neighbor_vid, eid, created_version) tuples
314    pub fn from_edge_entries(max_vid_offset: usize, mut raw: Vec<(u64, Vid, Eid, u64)>) -> Self {
315        if raw.is_empty() {
316            return Self {
317                offsets: vec![0],
318                entries: Vec::new(),
319            };
320        }
321
322        raw.sort_by_key(|(src, _, _, _)| *src);
323
324        let mut offsets = vec![0u32; max_vid_offset + 2];
325        let mut entries = Vec::with_capacity(raw.len());
326
327        let mut current_offset = 0u32;
328        let mut last_src = 0usize;
329
330        for (src, neighbor_vid, eid, created_version) in raw {
331            let src_idx = src as usize;
332
333            if src_idx > last_src {
334                for offset in offsets.iter_mut().take(src_idx + 1).skip(last_src + 1) {
335                    *offset = current_offset;
336                }
337            }
338            last_src = src_idx;
339
340            entries.push(CsrEdgeEntry {
341                neighbor_vid,
342                eid,
343                created_version,
344            });
345            current_offset += 1;
346        }
347
348        for offset in offsets.iter_mut().skip(last_src + 1) {
349            *offset = current_offset;
350        }
351
352        Self { offsets, entries }
353    }
354
355    /// O(1) versioned entry lookup by VID.
356    pub fn get_entries(&self, vid: Vid) -> &[CsrEdgeEntry] {
357        let local = vid.as_u64() as usize;
358        if local + 1 >= self.offsets.len() {
359            return &[];
360        }
361        let start = self.offsets[local] as usize;
362        let end = self.offsets[local + 1] as usize;
363        if start >= self.entries.len() || end > self.entries.len() {
364            return &[];
365        }
366        &self.entries[start..end]
367    }
368
369    /// O(1) neighbor lookup (ignores version).
370    pub fn get_neighbors_unversioned(&self, vid: Vid) -> (Vec<Vid>, Vec<Eid>) {
371        let entries = self.get_entries(vid);
372        let vids: Vec<Vid> = entries.iter().map(|e| e.neighbor_vid).collect();
373        let eids: Vec<Eid> = entries.iter().map(|e| e.eid).collect();
374        (vids, eids)
375    }
376
377    /// Approximate memory usage in bytes.
378    pub fn memory_usage(&self) -> usize {
379        self.offsets.len() * 4 + self.entries.len() * std::mem::size_of::<CsrEdgeEntry>()
380    }
381
382    /// Returns the number of edges.
383    pub fn num_edges(&self) -> usize {
384        self.entries.len()
385    }
386
387    /// Returns the number of vertices (rows) in the CSR.
388    pub fn num_vertices(&self) -> usize {
389        if self.offsets.is_empty() {
390            0
391        } else {
392            self.offsets.len() - 1
393        }
394    }
395}
396
397#[cfg(test)]
398mod main_csr_tests {
399    use super::*;
400
401    #[test]
402    fn test_main_csr_basic() {
403        let entries = vec![
404            (0u64, Vid::new(1), Eid::new(100), 1u64),
405            (0u64, Vid::new(2), Eid::new(101), 2u64),
406            (1u64, Vid::new(2), Eid::new(102), 1u64),
407        ];
408
409        let csr = MainCsr::from_edge_entries(2, entries);
410
411        let e0 = csr.get_entries(Vid::new(0));
412        assert_eq!(e0.len(), 2);
413        assert_eq!(e0[0].neighbor_vid, Vid::new(1));
414        assert_eq!(e0[0].created_version, 1);
415        assert_eq!(e0[1].neighbor_vid, Vid::new(2));
416
417        let e1 = csr.get_entries(Vid::new(1));
418        assert_eq!(e1.len(), 1);
419        assert_eq!(e1[0].eid, Eid::new(102));
420    }
421
422    #[test]
423    fn test_main_csr_empty() {
424        let csr = MainCsr::from_edge_entries(0, vec![]);
425        assert_eq!(csr.num_edges(), 0);
426        assert_eq!(csr.get_entries(Vid::new(0)).len(), 0);
427    }
428
429    #[test]
430    fn test_main_csr_get_neighbors() {
431        let entries = vec![
432            (0u64, Vid::new(10), Eid::new(100), 1u64),
433            (0u64, Vid::new(20), Eid::new(101), 2u64),
434        ];
435        let csr = MainCsr::from_edge_entries(0, entries);
436        let (vids, eids) = csr.get_neighbors_unversioned(Vid::new(0));
437        assert_eq!(vids, vec![Vid::new(10), Vid::new(20)]);
438        assert_eq!(eids, vec![Eid::new(100), Eid::new(101)]);
439    }
440
441    /// H13: a neighbor VID beyond u32 range must NOT be silently truncated into
442    /// the legacy dense-index slice — it fails loudly instead.
443    #[test]
444    #[should_panic(expected = "VID exceeds u32 range")]
445    fn test_csr_new_rejects_vid_beyond_u32() {
446        let big = Vid::new(1u64 << 33); // > u32::MAX
447        let _ = CompressedSparseRow::new(0, vec![(0u64, big, Eid::new(1))]);
448    }
449}