formualizer_eval/engine/
csr_edges.rs

1use super::packed_coord::PackedCoord;
2use super::vertex::VertexId;
3
4#[cfg(test)]
5mod tests {
6    use super::*;
7
8    #[test]
9    fn test_csr_construction() {
10        let edges = vec![
11            (0u32, vec![1u32, 2u32]),
12            (1u32, vec![2u32, 3u32]),
13            (2u32, vec![3u32]),
14            (3u32, vec![]),
15        ];
16
17        let coords = vec![
18            PackedCoord::new(0, 0),
19            PackedCoord::new(0, 1),
20            PackedCoord::new(1, 0),
21            PackedCoord::new(1, 1),
22        ];
23
24        let csr = CsrEdges::from_adjacency(edges, &coords);
25
26        assert_eq!(csr.out_edges(VertexId(0)), &[VertexId(1), VertexId(2)]);
27        assert_eq!(csr.out_edges(VertexId(1)), &[VertexId(2), VertexId(3)]);
28        assert_eq!(csr.out_edges(VertexId(3)), &[]);
29    }
30
31    #[test]
32    fn test_csr_memory_efficiency() {
33        // 10K vertices, average 4 edges each
34        let mut edges = Vec::new();
35        let mut coords = Vec::new();
36
37        for i in 0..10_000u32 {
38            let targets: Vec<_> = (0..4).map(|j| ((i + j + 1) % 10_000)).collect();
39            edges.push((i, targets));
40            coords.push(PackedCoord::new(i, i));
41        }
42
43        let csr = CsrEdges::from_adjacency(edges, &coords);
44
45        // Should use ~200KB (40k edges × 4B + 10k vertices × 4B)
46        assert!(csr.memory_usage() < 410_000, "{}", csr.memory_usage());
47    }
48
49    #[test]
50    fn test_csr_edge_ordering() {
51        // Test that edges are sorted by (row, col, id) for determinism
52        let edges = vec![
53            (0u32, vec![3u32, 1u32, 2u32]), // Unsorted input
54        ];
55
56        let coords = vec![
57            PackedCoord::new(0, 0), // vertex 0
58            PackedCoord::new(0, 5), // vertex 1
59            PackedCoord::new(0, 3), // vertex 2
60            PackedCoord::new(1, 0), // vertex 3
61        ];
62
63        let csr = CsrEdges::from_adjacency(edges, &coords);
64
65        // Should be sorted by row first, then col: [1(0,5), 2(0,3), 3(1,0)]
66        // But row 0 comes before row 1, so order is: 2(0,3), 1(0,5), 3(1,0)
67        assert_eq!(
68            csr.out_edges(VertexId(0)),
69            &[VertexId(2), VertexId(1), VertexId(3)]
70        );
71    }
72
73    #[test]
74    fn test_csr_empty_graph() {
75        let edges: Vec<(u32, Vec<u32>)> = vec![];
76        let coords: Vec<PackedCoord> = vec![];
77
78        let csr = CsrEdges::from_adjacency(edges, &coords);
79
80        assert_eq!(csr.num_vertices(), 0);
81        assert_eq!(csr.num_edges(), 0);
82        // Empty graph has one offset entry (0) = 4 bytes
83        assert_eq!(csr.memory_usage(), 8);
84    }
85
86    #[test]
87    fn test_csr_single_vertex() {
88        let edges = vec![(0u32, vec![])];
89        let coords = vec![PackedCoord::new(0, 0)];
90
91        let csr = CsrEdges::from_adjacency(edges, &coords);
92
93        assert_eq!(csr.num_vertices(), 1);
94        assert_eq!(csr.num_edges(), 0);
95        assert_eq!(csr.out_edges(VertexId(0)), &[]);
96    }
97
98    #[test]
99    fn test_csr_self_loop() {
100        let edges = vec![(0u32, vec![0u32])]; // Self loop
101        let coords = vec![PackedCoord::new(0, 0)];
102
103        let csr = CsrEdges::from_adjacency(edges, &coords);
104
105        assert_eq!(csr.out_edges(VertexId(0)), &[VertexId(0)]);
106        assert_eq!(csr.num_edges(), 1);
107    }
108
109    #[test]
110    fn test_csr_duplicate_edges() {
111        // CSR should preserve duplicates (formulas can reference same cell multiple times)
112        let edges = vec![(0u32, vec![1u32, 1u32, 2u32, 1u32])];
113        let coords = vec![
114            PackedCoord::new(0, 0),
115            PackedCoord::new(0, 1),
116            PackedCoord::new(0, 2),
117        ];
118
119        let csr = CsrEdges::from_adjacency(edges, &coords);
120
121        // Should preserve all edges, sorted by target coords
122        assert_eq!(
123            csr.out_edges(VertexId(0)),
124            &[VertexId(1), VertexId(1), VertexId(1), VertexId(2)]
125        );
126    }
127
128    #[test]
129    fn test_degree_calculation() {
130        let edges = vec![
131            (0u32, vec![1u32, 2u32, 3u32]),
132            (1u32, vec![2u32]),
133            (2u32, vec![]),
134            (3u32, vec![0u32, 1u32]),
135        ];
136
137        let coords = vec![
138            PackedCoord::new(0, 0),
139            PackedCoord::new(0, 1),
140            PackedCoord::new(1, 0),
141            PackedCoord::new(1, 1),
142        ];
143
144        let csr = CsrEdges::from_adjacency(edges, &coords);
145
146        assert_eq!(csr.out_degree(VertexId(0)), 3);
147        assert_eq!(csr.out_degree(VertexId(1)), 1);
148        assert_eq!(csr.out_degree(VertexId(2)), 0);
149        assert_eq!(csr.out_degree(VertexId(3)), 2);
150    }
151
152    #[test]
153    fn test_out_of_bounds_access() {
154        let edges = vec![(0u32, vec![])];
155        let coords = vec![PackedCoord::new(0, 0)];
156
157        let csr = CsrEdges::from_adjacency(edges, &coords);
158
159        // Should return empty slice - only vertex 0 exists
160        assert_eq!(csr.out_edges(VertexId(1)), &[]);
161    }
162
163    #[test]
164    fn test_csr_iterator() {
165        let edges = vec![
166            (0u32, vec![1u32, 2u32]),
167            (1u32, vec![3u32]),
168            (2u32, vec![1u32, 3u32]),
169            (3u32, vec![]),
170        ];
171
172        let coords = vec![
173            PackedCoord::new(0, 0),
174            PackedCoord::new(0, 1),
175            PackedCoord::new(1, 0),
176            PackedCoord::new(1, 1),
177        ];
178
179        let csr = CsrEdges::from_adjacency(edges, &coords);
180
181        let collected: Vec<_> = csr.iter().collect();
182        assert_eq!(collected.len(), 4);
183        assert_eq!(collected[0].0, VertexId(0));
184        assert_eq!(collected[0].1, &[VertexId(1), VertexId(2)]);
185        assert_eq!(collected[3].1, &[]);
186    }
187
188    #[test]
189    fn test_has_edge() {
190        let edges = vec![
191            (0u32, vec![1u32, 2u32]),
192            (1u32, vec![3u32]),
193            (2u32, vec![]),
194            (3u32, vec![0u32]), // Back edge
195        ];
196
197        let coords = vec![
198            PackedCoord::new(0, 0),
199            PackedCoord::new(0, 1),
200            PackedCoord::new(1, 0),
201            PackedCoord::new(1, 1),
202        ];
203
204        let csr = CsrEdges::from_adjacency(edges, &coords);
205
206        assert!(csr.has_edge(VertexId(0), VertexId(1)));
207        assert!(csr.has_edge(VertexId(0), VertexId(2)));
208        assert!(!csr.has_edge(VertexId(0), VertexId(3)));
209        assert!(csr.has_edge(VertexId(3), VertexId(0))); // Back edge exists
210        assert!(!csr.has_edge(VertexId(2), VertexId(0))); // No edge
211    }
212
213    #[test]
214    fn test_csr_with_offset_vertex_ids() {
215        // Test CSR with vertex IDs starting at 1024 (FIRST_NORMAL_VERTEX)
216        let base_id = 1024u32;
217        let edges = vec![
218            (base_id, vec![base_id + 1, base_id + 2]),
219            (base_id + 1, vec![base_id + 3]),
220            (base_id + 2, vec![base_id + 3]),
221            (base_id + 3, vec![]),
222        ];
223
224        let coords = vec![
225            PackedCoord::new(0, 0),
226            PackedCoord::new(0, 1),
227            PackedCoord::new(1, 0),
228            PackedCoord::new(1, 1),
229        ];
230
231        let csr = CsrEdges::from_adjacency(edges, &coords);
232
233        // Verify min vertex ID
234        assert_eq!(csr.min_vertex_id, base_id);
235
236        // Verify edges work with offset IDs
237        assert_eq!(
238            csr.out_edges(VertexId(base_id)),
239            &[VertexId(base_id + 1), VertexId(base_id + 2)]
240        );
241        assert_eq!(
242            csr.out_edges(VertexId(base_id + 1)),
243            &[VertexId(base_id + 3)]
244        );
245        assert_eq!(
246            csr.out_edges(VertexId(base_id + 2)),
247            &[VertexId(base_id + 3)]
248        );
249        assert_eq!(csr.out_edges(VertexId(base_id + 3)), &[]);
250
251        // Verify out of bounds returns empty
252        assert_eq!(csr.out_edges(VertexId(0)), &[]); // Before min
253        assert_eq!(csr.out_edges(VertexId(base_id + 100)), &[]); // After max
254    }
255
256    #[test]
257    fn test_csr_with_sparse_vertex_ids() {
258        // Test CSR with sparse vertex IDs (gaps in numbering)
259        let edges = vec![
260            (100u32, vec![300u32, 500u32]),
261            (300u32, vec![500u32]),
262            (500u32, vec![100u32]), // Back edge
263        ];
264
265        let coords = vec![
266            PackedCoord::new(0, 0), // For vertex 100 (index 0)
267            PackedCoord::new(0, 0), // Padding (index 100-199)
268            PackedCoord::new(1, 0), // For vertex 300 (index 200)
269            PackedCoord::new(0, 0), // Padding (index 300-399)
270            PackedCoord::new(2, 0), // For vertex 500 (index 400)
271        ];
272
273        let csr = CsrEdges::from_adjacency(edges, &coords);
274
275        // Verify min vertex ID
276        assert_eq!(csr.min_vertex_id, 100);
277
278        // Verify edges work
279        assert_eq!(
280            csr.out_edges(VertexId(100)),
281            &[VertexId(300), VertexId(500)]
282        );
283        assert_eq!(csr.out_edges(VertexId(300)), &[VertexId(500)]);
284        assert_eq!(csr.out_edges(VertexId(500)), &[VertexId(100)]);
285
286        // Non-existent vertices return empty
287        assert_eq!(csr.out_edges(VertexId(200)), &[]);
288        assert_eq!(csr.out_edges(VertexId(400)), &[]);
289    }
290}
291
292/// Compressed Sparse Row (CSR) format for edge storage
293///
294/// Replaces Vec<VertexId> per vertex with two arrays:
295/// - offsets: Start index for each vertex's edges
296/// - edges: All edges concatenated
297///
298/// Memory usage: O(V + E) instead of O(V * avg_degree * vec_overhead)
299#[derive(Debug, Clone)]
300pub struct CsrEdges {
301    /// Offsets into the edges array. Length = num_vertices + 1
302    /// offset[i] = start index of vertex i's edges
303    /// offset[i+1] - offset[i] = number of edges for vertex i
304    offsets: Vec<u32>,
305
306    /// All edges concatenated, sorted within each vertex's section
307    edges: Vec<VertexId>,
308
309    /// Reverse edges: offsets for incoming edges
310    reverse_offsets: Vec<u32>,
311
312    /// All incoming edges concatenated
313    reverse_edges: Vec<VertexId>,
314
315    /// Minimum vertex ID in the graph (for offset calculation)
316    min_vertex_id: u32,
317}
318
319impl CsrEdges {
320    /// Create CSR from adjacency list representation
321    ///
322    /// # Arguments
323    /// - adj: Vector of (vertex_id, outgoing_edges) where vertex_id is the actual VertexId value
324    /// - coords: Packed coordinates for each vertex (used for deterministic ordering)
325    ///
326    /// # Edge Ordering
327    /// Edges are sorted by (row, col, vertex_id) to ensure deterministic
328    /// evaluation order for formulas (important for functions with side effects)
329    pub fn from_adjacency(adj: Vec<(u32, Vec<u32>)>, coords: &[PackedCoord]) -> Self {
330        if adj.is_empty() {
331            return Self {
332                offsets: vec![0],
333                edges: Vec::new(),
334                reverse_offsets: vec![0],
335                reverse_edges: Vec::new(),
336                min_vertex_id: 0,
337            };
338        }
339
340        // Find min and max vertex IDs
341        let mut min_id = u32::MAX;
342        let mut max_id = 0;
343        for &(vid, ref targets) in &adj {
344            min_id = min_id.min(vid);
345            max_id = max_id.max(vid);
346            for &target in targets {
347                min_id = min_id.min(target);
348                max_id = max_id.max(target);
349            }
350        }
351
352        // If no vertices, return empty
353        if min_id == u32::MAX {
354            return Self {
355                offsets: vec![0],
356                edges: Vec::new(),
357                reverse_offsets: vec![0],
358                reverse_edges: Vec::new(),
359                min_vertex_id: 0,
360            };
361        }
362
363        let num_vertices = (max_id - min_id + 1) as usize;
364        let mut offsets = vec![0u32; num_vertices + 1];
365        let mut edges = Vec::new();
366
367        // Build adjacency data indexed by offset
368        let mut adj_by_offset: Vec<Vec<u32>> = vec![Vec::new(); num_vertices];
369        for (vid, targets) in adj {
370            let offset_idx = (vid - min_id) as usize;
371            adj_by_offset[offset_idx] = targets;
372        }
373
374        // Build forward edges
375        for (idx, mut targets) in adj_by_offset.clone().into_iter().enumerate() {
376            // Sort targets by their coordinates for deterministic ordering
377            targets.sort_by_key(|&t| {
378                // Convert vertex ID to index in coords array
379                let coord_idx = (t - min_id) as usize;
380                if coord_idx < coords.len() {
381                    let coord = coords[coord_idx];
382                    (coord.row(), coord.col(), t)
383                } else {
384                    // Handle out-of-bounds gracefully for construction
385                    (u32::MAX, u32::MAX, t)
386                }
387            });
388
389            edges.extend(targets.into_iter().map(VertexId));
390            offsets[idx + 1] = edges.len() as u32;
391        }
392
393        // Build reverse edges (incoming edges for each vertex)
394        let mut reverse_offsets = vec![0u32; num_vertices + 1];
395        let mut reverse_edges = Vec::new();
396        let mut reverse_adj: Vec<Vec<u32>> = vec![Vec::new(); num_vertices];
397
398        // Collect reverse edges
399        for (idx, targets) in adj_by_offset.into_iter().enumerate() {
400            let source = min_id + idx as u32;
401            for target in targets {
402                let target_idx = (target - min_id) as usize;
403                if target_idx < num_vertices {
404                    reverse_adj[target_idx].push(source);
405                }
406            }
407        }
408
409        // Build reverse CSR
410        for (idx, mut sources) in reverse_adj.into_iter().enumerate() {
411            // Sort sources by their coordinates for deterministic ordering
412            sources.sort_by_key(|&s| {
413                let coord_idx = (s - min_id) as usize;
414                if coord_idx < coords.len() {
415                    let coord = coords[coord_idx];
416                    (coord.row(), coord.col(), s)
417                } else {
418                    (u32::MAX, u32::MAX, s)
419                }
420            });
421
422            reverse_edges.extend(sources.into_iter().map(VertexId));
423            reverse_offsets[idx + 1] = reverse_edges.len() as u32;
424        }
425
426        Self {
427            offsets,
428            edges,
429            reverse_offsets,
430            reverse_edges,
431            min_vertex_id: min_id,
432        }
433    }
434
435    /// Get outgoing edges for a vertex
436    #[inline]
437    pub fn out_edges(&self, v: VertexId) -> &[VertexId] {
438        // Handle empty graph
439        if self.offsets.len() <= 1 {
440            return &[];
441        }
442
443        // Convert vertex ID to offset index
444        if v.0 < self.min_vertex_id {
445            return &[];
446        }
447
448        let idx = (v.0 - self.min_vertex_id) as usize;
449        if idx >= self.offsets.len() - 1 {
450            return &[];
451        }
452
453        let start = self.offsets[idx] as usize;
454        let end = self.offsets[idx + 1] as usize;
455        &self.edges[start..end]
456    }
457
458    /// Get incoming edges for a vertex (who depends on this vertex)
459    #[inline]
460    pub fn in_edges(&self, v: VertexId) -> &[VertexId] {
461        // Handle empty graph
462        if self.reverse_offsets.len() <= 1 {
463            return &[];
464        }
465
466        // Convert vertex ID to offset index
467        if v.0 < self.min_vertex_id {
468            return &[];
469        }
470
471        let idx = (v.0 - self.min_vertex_id) as usize;
472        if idx >= self.reverse_offsets.len() - 1 {
473            return &[];
474        }
475
476        let start = self.reverse_offsets[idx] as usize;
477        let end = self.reverse_offsets[idx + 1] as usize;
478        &self.reverse_edges[start..end]
479    }
480
481    /// Get the out-degree of a vertex
482    #[inline]
483    pub fn out_degree(&self, v: VertexId) -> usize {
484        // Handle empty graph
485        if self.offsets.len() <= 1 {
486            return 0;
487        }
488
489        // Convert vertex ID to offset index
490        if v.0 < self.min_vertex_id {
491            return 0;
492        }
493
494        let idx = (v.0 - self.min_vertex_id) as usize;
495        if idx >= self.offsets.len() - 1 {
496            return 0;
497        }
498
499        let start = self.offsets[idx];
500        let end = self.offsets[idx + 1];
501        (end - start) as usize
502    }
503
504    /// Get the in-degree of a vertex
505    #[inline]
506    pub fn in_degree(&self, v: VertexId) -> usize {
507        self.in_edges(v).len()
508    }
509
510    /// Number of vertices in the graph
511    #[inline]
512    pub fn num_vertices(&self) -> usize {
513        self.offsets.len().saturating_sub(1)
514    }
515
516    /// Total number of edges in the graph
517    #[inline]
518    pub fn num_edges(&self) -> usize {
519        self.edges.len()
520    }
521
522    /// Memory usage in bytes
523    pub fn memory_usage(&self) -> usize {
524        self.offsets.len() * std::mem::size_of::<u32>()
525            + self.edges.len() * std::mem::size_of::<VertexId>()
526            + self.reverse_offsets.len() * std::mem::size_of::<u32>()
527            + self.reverse_edges.len() * std::mem::size_of::<VertexId>()
528    }
529
530    /// Create an empty CSR graph
531    pub fn empty() -> Self {
532        Self {
533            offsets: vec![0],
534            edges: Vec::new(),
535            reverse_offsets: vec![0],
536            reverse_edges: Vec::new(),
537            min_vertex_id: 0,
538        }
539    }
540
541    /// Builder pattern for incremental construction
542    pub fn builder() -> CsrBuilder {
543        CsrBuilder::new()
544    }
545
546    /// Iterate over all vertices and their outgoing edges
547    pub fn iter(&self) -> CsrIterator {
548        CsrIterator {
549            csr: self,
550            current_vertex: 0,
551        }
552    }
553
554    /// Check if the graph has a specific edge
555    pub fn has_edge(&self, from: VertexId, to: VertexId) -> bool {
556        self.out_edges(from).contains(&to)
557    }
558}
559
560/// Iterator over vertices and their edges
561pub struct CsrIterator<'a> {
562    csr: &'a CsrEdges,
563    current_vertex: usize,
564}
565
566impl<'a> Iterator for CsrIterator<'a> {
567    type Item = (VertexId, &'a [VertexId]);
568
569    fn next(&mut self) -> Option<Self::Item> {
570        if self.current_vertex >= self.csr.num_vertices() {
571            return None;
572        }
573
574        let vertex_id = VertexId(self.current_vertex as u32 + self.csr.min_vertex_id);
575        let edges = self.csr.out_edges(vertex_id);
576        self.current_vertex += 1;
577
578        Some((vertex_id, edges))
579    }
580}
581
582/// Builder for incremental CSR construction
583pub struct CsrBuilder {
584    adjacency: Vec<Vec<usize>>,
585    coords: Vec<PackedCoord>,
586}
587
588impl Default for CsrBuilder {
589    fn default() -> Self {
590        Self::new()
591    }
592}
593
594impl CsrBuilder {
595    pub fn new() -> Self {
596        Self {
597            adjacency: Vec::new(),
598            coords: Vec::new(),
599        }
600    }
601
602    /// Add a vertex with its coordinate
603    pub fn add_vertex(&mut self, coord: PackedCoord) -> usize {
604        let idx = self.adjacency.len();
605        self.adjacency.push(Vec::new());
606        self.coords.push(coord);
607        idx
608    }
609
610    /// Add an edge from source to target
611    pub fn add_edge(&mut self, from: usize, to: usize) {
612        if from < self.adjacency.len() {
613            self.adjacency[from].push(to);
614        }
615    }
616
617    /// Build the final CSR structure
618    pub fn build(self) -> CsrEdges {
619        // Convert to (vertex_id, edges) format starting from vertex ID 0
620        let adj: Vec<_> = self
621            .adjacency
622            .into_iter()
623            .enumerate()
624            .map(|(idx, edges)| (idx as u32, edges.into_iter().map(|e| e as u32).collect()))
625            .collect();
626        CsrEdges::from_adjacency(adj, &self.coords)
627    }
628}