formualizer_eval/engine/
sheet_index.rs

1use super::interval_tree::IntervalTree;
2use super::vertex::VertexId;
3use formualizer_common::Coord as AbsCoord;
4use std::collections::HashSet;
5
6/// Sheet-level sparse index for efficient range queries on vertex positions.
7///
8/// ## Why SheetIndex with interval trees?
9///
10/// While `cell_to_vertex: HashMap<CellRef, VertexId>` provides O(1) exact lookups,
11/// structural operations (insert/delete rows/columns) need to find ALL vertices
12/// in a given range, which would require O(n) full scans of the hash map.
13///
14/// ### Performance comparison:
15///
16/// | Operation | Hash map only | With SheetIndex |
17/// |-----------|---------------|-----------------|
18/// | Insert 100 rows at row 20,000 | O(total cells) | O(log n + k)* |
19/// | Delete columns B:D | O(total cells) | O(log n + k) |
20/// | Viewport query (visible cells) | O(total cells) | O(log n + k) |
21///
22/// *where n = number of indexed vertices, k = vertices actually affected
23///
24/// ### Memory efficiency:
25///
26/// - Each interval is just 2×u32 + Vec<VertexId> pointer
27/// - Spreadsheets are extremely sparse (1M row sheet typically has <10K cells)
28/// - Point intervals (single cells) are the common case
29/// - Trees stay small and cache-friendly
30///
31/// ### Future benefits:
32///
33/// 1. **Virtual scrolling** - fetch viewport cells in microseconds
34/// 2. **Lazy evaluation** - mark row blocks dirty without scanning
35/// 3. **Concurrent reads** - trees are read-mostly, perfect for RwLock
36/// 4. **Minimal undo/redo** - know exactly which vertices were touched
37#[derive(Debug, Default)]
38pub struct SheetIndex {
39    /// Row interval tree: maps row ranges → vertices in those rows
40    /// For a cell at (r,c), we store the point interval [r,r] → VertexId
41    row_tree: IntervalTree<VertexId>,
42
43    /// Column interval tree: maps column ranges → vertices in those columns  
44    /// For a cell at (r,c), we store the point interval [c,c] → VertexId
45    col_tree: IntervalTree<VertexId>,
46}
47
48impl SheetIndex {
49    /// Create a new empty sheet index
50    pub fn new() -> Self {
51        Self {
52            row_tree: IntervalTree::new(),
53            col_tree: IntervalTree::new(),
54        }
55    }
56
57    /// Fast path build from sorted coordinates. Assumes items are row-major sorted.
58    pub fn build_from_sorted(&mut self, items: &[(AbsCoord, VertexId)]) {
59        self.add_vertices_batch(items);
60    }
61
62    /// Add a vertex at the given coordinate to the index.
63    ///
64    /// ## Complexity
65    /// O(log n) where n is the number of vertices in the index
66    pub fn add_vertex(&mut self, coord: AbsCoord, vertex_id: VertexId) {
67        let row = coord.row();
68        let col = coord.col();
69
70        // Add to row tree - point interval [row, row]
71        self.row_tree
72            .entry(row, row)
73            .or_insert_with(HashSet::new)
74            .insert(vertex_id);
75
76        // Add to column tree - point interval [col, col]
77        self.col_tree
78            .entry(col, col)
79            .or_insert_with(HashSet::new)
80            .insert(vertex_id);
81    }
82
83    /// Add many vertices in a single pass. Assumes coords belong to same sheet index.
84    pub fn add_vertices_batch(&mut self, items: &[(AbsCoord, VertexId)]) {
85        if items.is_empty() {
86            return;
87        }
88        // If trees are empty we can bulk build from sorted points in O(n log n) with better constants.
89        if self.row_tree.is_empty() && self.col_tree.is_empty() {
90            // Build row points
91            let mut row_items: Vec<(u32, HashSet<VertexId>)> = Vec::with_capacity(items.len());
92            let mut col_items: Vec<(u32, HashSet<VertexId>)> = Vec::with_capacity(items.len());
93            // Use temp hash maps for merging duplicates
94            use rustc_hash::FxHashMap;
95            let mut row_map: FxHashMap<u32, HashSet<VertexId>> = FxHashMap::default();
96            let mut col_map: FxHashMap<u32, HashSet<VertexId>> = FxHashMap::default();
97            for (coord, vid) in items {
98                row_map.entry(coord.row()).or_default().insert(*vid);
99                col_map.entry(coord.col()).or_default().insert(*vid);
100            }
101            row_items.reserve(row_map.len());
102            for (k, v) in row_map.into_iter() {
103                row_items.push((k, v));
104            }
105            col_items.reserve(col_map.len());
106            for (k, v) in col_map.into_iter() {
107                col_items.push((k, v));
108            }
109            self.row_tree.bulk_build_points(row_items);
110            self.col_tree.bulk_build_points(col_items);
111            return;
112        }
113        // Fallback: incremental for already populated index
114        for (coord, vid) in items {
115            let row = coord.row();
116            let col = coord.col();
117            self.row_tree
118                .entry(row, row)
119                .or_insert_with(HashSet::new)
120                .insert(*vid);
121            self.col_tree
122                .entry(col, col)
123                .or_insert_with(HashSet::new)
124                .insert(*vid);
125        }
126    }
127
128    /// Remove a vertex from the index.
129    ///
130    /// ## Complexity
131    /// O(log n) where n is the number of vertices in the index
132    pub fn remove_vertex(&mut self, coord: AbsCoord, vertex_id: VertexId) {
133        let row = coord.row();
134        let col = coord.col();
135
136        // Remove from row tree
137        if let Some(vertices) = self.row_tree.get_mut(row, row) {
138            vertices.remove(&vertex_id);
139            // Note: We could remove empty intervals, but keeping them is fine
140            // as they take minimal space and may be reused
141        }
142
143        // Remove from column tree
144        if let Some(vertices) = self.col_tree.get_mut(col, col) {
145            vertices.remove(&vertex_id);
146        }
147    }
148
149    /// Update a vertex's position in the index (move operation).
150    ///
151    /// ## Complexity
152    /// O(log n) for removal + O(log n) for insertion = O(log n)
153    pub fn update_vertex(&mut self, old_coord: AbsCoord, new_coord: AbsCoord, vertex_id: VertexId) {
154        self.remove_vertex(old_coord, vertex_id);
155        self.add_vertex(new_coord, vertex_id);
156    }
157
158    /// Query all vertices in the given row range.
159    ///
160    /// ## Complexity
161    /// O(log n + k) where k is the number of vertices in the range
162    pub fn vertices_in_row_range(&self, start: u32, end: u32) -> Vec<VertexId> {
163        let mut result = HashSet::new();
164
165        for (_low, _high, values) in self.row_tree.query(start, end) {
166            result.extend(values.iter());
167        }
168
169        result.into_iter().collect()
170    }
171
172    /// Query all vertices in the given column range.
173    ///
174    /// ## Complexity
175    /// O(log n + k) where k is the number of vertices in the range
176    pub fn vertices_in_col_range(&self, start: u32, end: u32) -> Vec<VertexId> {
177        let mut result = HashSet::new();
178
179        for (_low, _high, values) in self.col_tree.query(start, end) {
180            result.extend(values.iter());
181        }
182
183        result.into_iter().collect()
184    }
185
186    /// Query all vertices in a rectangular range.
187    ///
188    /// ## Complexity
189    /// O(log n + k) where k is the number of vertices in the range
190    pub fn vertices_in_rect(
191        &self,
192        start_row: u32,
193        end_row: u32,
194        start_col: u32,
195        end_col: u32,
196    ) -> Vec<VertexId> {
197        // Get vertices in row range
198        let row_vertices: HashSet<_> = self
199            .vertices_in_row_range(start_row, end_row)
200            .into_iter()
201            .collect();
202
203        // Get vertices in column range
204        let col_vertices: HashSet<_> = self
205            .vertices_in_col_range(start_col, end_col)
206            .into_iter()
207            .collect();
208
209        // Return intersection - vertices that are in both ranges
210        row_vertices.intersection(&col_vertices).copied().collect()
211    }
212
213    /// Get the total number of indexed vertices (may count duplicates if vertex appears at multiple positions).
214    pub fn len(&self) -> usize {
215        let mut unique: HashSet<VertexId> = HashSet::new();
216
217        // Collect all unique vertices from row tree
218        for (_low, _high, values) in self.row_tree.query(0, u32::MAX) {
219            unique.extend(values.iter());
220        }
221
222        unique.len()
223    }
224
225    /// Check if the index is empty.
226    pub fn is_empty(&self) -> bool {
227        self.row_tree.is_empty()
228    }
229
230    /// Clear all entries from the index.
231    pub fn clear(&mut self) {
232        self.row_tree = IntervalTree::new();
233        self.col_tree = IntervalTree::new();
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_add_and_query_single_vertex() {
243        let mut index = SheetIndex::new();
244        let coord = AbsCoord::new(5, 10);
245        let vertex_id = VertexId(1024);
246
247        index.add_vertex(coord, vertex_id);
248
249        // Query exact row
250        let row_results = index.vertices_in_row_range(5, 5);
251        assert_eq!(row_results, vec![vertex_id]);
252
253        // Query exact column
254        let col_results = index.vertices_in_col_range(10, 10);
255        assert_eq!(col_results, vec![vertex_id]);
256
257        // Query range containing the vertex
258        let row_results = index.vertices_in_row_range(3, 7);
259        assert_eq!(row_results, vec![vertex_id]);
260    }
261
262    #[test]
263    fn test_remove_vertex() {
264        let mut index = SheetIndex::new();
265        let coord = AbsCoord::new(5, 10);
266        let vertex_id = VertexId(1024);
267
268        index.add_vertex(coord, vertex_id);
269        assert_eq!(index.len(), 1);
270
271        index.remove_vertex(coord, vertex_id);
272        assert_eq!(index.len(), 0);
273
274        // Should return empty after removal
275        let row_results = index.vertices_in_row_range(5, 5);
276        assert!(row_results.is_empty());
277    }
278
279    #[test]
280    fn test_update_vertex_position() {
281        let mut index = SheetIndex::new();
282        let old_coord = AbsCoord::new(5, 10);
283        let new_coord = AbsCoord::new(15, 20);
284        let vertex_id = VertexId(1024);
285
286        index.add_vertex(old_coord, vertex_id);
287        index.update_vertex(old_coord, new_coord, vertex_id);
288
289        // Should not be at old position
290        let old_row_results = index.vertices_in_row_range(5, 5);
291        assert!(old_row_results.is_empty());
292
293        // Should be at new position
294        let new_row_results = index.vertices_in_row_range(15, 15);
295        assert_eq!(new_row_results, vec![vertex_id]);
296
297        let new_col_results = index.vertices_in_col_range(20, 20);
298        assert_eq!(new_col_results, vec![vertex_id]);
299    }
300
301    #[test]
302    fn test_range_queries() {
303        let mut index = SheetIndex::new();
304
305        // Add vertices in a pattern
306        for row in 0..10 {
307            for col in 0..5 {
308                let coord = AbsCoord::new(row, col);
309                let vertex_id = VertexId(1024 + row * 5 + col);
310                index.add_vertex(coord, vertex_id);
311            }
312        }
313
314        // Query rows 3-5 (should get 3 rows × 5 cols = 15 vertices)
315        let row_results = index.vertices_in_row_range(3, 5);
316        assert_eq!(row_results.len(), 15);
317
318        // Query columns 1-2 (should get 10 rows × 2 cols = 20 vertices)
319        let col_results = index.vertices_in_col_range(1, 2);
320        assert_eq!(col_results.len(), 20);
321
322        // Query rectangle (rows 3-5, cols 1-2) should get 3 × 2 = 6 vertices
323        let rect_results = index.vertices_in_rect(3, 5, 1, 2);
324        assert_eq!(rect_results.len(), 6);
325    }
326
327    #[test]
328    fn test_sparse_sheet_efficiency() {
329        let mut index = SheetIndex::new();
330
331        // Simulate sparse sheet - only a few cells in a million-row range
332        index.add_vertex(AbsCoord::new(100, 5), VertexId(1024));
333        index.add_vertex(AbsCoord::new(50_000, 10), VertexId(1025));
334        index.add_vertex(AbsCoord::new(100_000, 15), VertexId(1026));
335        index.add_vertex(AbsCoord::new(500_000, 20), VertexId(1027));
336        index.add_vertex(AbsCoord::new(999_999, 25), VertexId(1028));
337
338        assert_eq!(index.len(), 5);
339
340        // Query for rows >= 100_000 (should find 3 vertices efficiently)
341        let high_rows = index.vertices_in_row_range(100_000, u32::MAX);
342        assert_eq!(high_rows.len(), 3);
343
344        // Query for specific column range
345        let col_range = index.vertices_in_col_range(10, 20);
346        assert_eq!(col_range.len(), 3); // columns 10, 15, 20
347    }
348
349    #[test]
350    fn test_shift_operation_query() {
351        let mut index = SheetIndex::new();
352
353        // Setup: cells at rows 10, 20, 30, 40, 50
354        for row in [10, 20, 30, 40, 50] {
355            index.add_vertex(AbsCoord::new(row, 0), VertexId(1024 + row));
356        }
357
358        // Simulate "insert 5 rows at row 25" - need to find all vertices with row >= 25
359        let vertices_to_shift = index.vertices_in_row_range(25, u32::MAX);
360        assert_eq!(vertices_to_shift.len(), 3); // rows 30, 40, 50
361
362        // Simulate "delete columns B:D" - need to find all vertices in columns 1-3
363        for col in 1..=3 {
364            index.add_vertex(AbsCoord::new(5, col), VertexId(2000 + col));
365        }
366
367        let vertices_to_delete = index.vertices_in_col_range(1, 3);
368        assert_eq!(vertices_to_delete.len(), 3);
369    }
370
371    #[test]
372    fn test_viewport_query() {
373        let mut index = SheetIndex::new();
374
375        // Simulate a spreadsheet with scattered data
376        for row in (0..10000).step_by(100) {
377            for col in 0..10 {
378                index.add_vertex(AbsCoord::new(row, col), VertexId(row * 10 + col));
379            }
380        }
381
382        // Query viewport: rows 500-1500, columns 2-7
383        let viewport = index.vertices_in_rect(500, 1500, 2, 7);
384
385        // Should find 11 rows (500, 600, ..., 1500) × 6 columns (2-7) = 66 vertices
386        assert_eq!(viewport.len(), 66);
387    }
388}