formualizer_eval/engine/
sheet_index.rs

1use super::interval_tree::IntervalTree;
2use super::packed_coord::PackedCoord;
3use super::vertex::VertexId;
4use std::collections::HashSet;
5
6/// Sheet-level sparse index for efficient range queries on vertex positions.
7///
8/// ## Why SheetIndex with interval trees?
9///
10/// While `cell_to_vertex: HashMap<CellRef, VertexId>` provides O(1) exact lookups,
11/// structural operations (insert/delete rows/columns) need to find ALL vertices
12/// in a given range, which would require O(n) full scans of the hash map.
13///
14/// ### Performance comparison:
15///
16/// | Operation | Hash map only | With SheetIndex |
17/// |-----------|---------------|-----------------|
18/// | Insert 100 rows at row 20,000 | O(total cells) | O(log n + k)* |
19/// | Delete columns B:D | O(total cells) | O(log n + k) |
20/// | Viewport query (visible cells) | O(total cells) | O(log n + k) |
21///
22/// *where n = number of indexed vertices, k = vertices actually affected
23///
24/// ### Memory efficiency:
25///
26/// - Each interval is just 2×u32 + Vec<VertexId> pointer
27/// - Spreadsheets are extremely sparse (1M row sheet typically has <10K cells)
28/// - Point intervals (single cells) are the common case
29/// - Trees stay small and cache-friendly
30///
31/// ### Future benefits:
32///
33/// 1. **Virtual scrolling** - fetch viewport cells in microseconds
34/// 2. **Lazy evaluation** - mark row blocks dirty without scanning
35/// 3. **Concurrent reads** - trees are read-mostly, perfect for RwLock
36/// 4. **Minimal undo/redo** - know exactly which vertices were touched
37#[derive(Debug, Default)]
38pub struct SheetIndex {
39    /// Row interval tree: maps row ranges → vertices in those rows
40    /// For a cell at (r,c), we store the point interval [r,r] → VertexId
41    row_tree: IntervalTree<VertexId>,
42
43    /// Column interval tree: maps column ranges → vertices in those columns  
44    /// For a cell at (r,c), we store the point interval [c,c] → VertexId
45    col_tree: IntervalTree<VertexId>,
46}
47
48impl SheetIndex {
49    /// Create a new empty sheet index
50    pub fn new() -> Self {
51        Self {
52            row_tree: IntervalTree::new(),
53            col_tree: IntervalTree::new(),
54        }
55    }
56
57    /// Fast path build from sorted coordinates. Assumes items are row-major sorted.
58    pub fn build_from_sorted(&mut self, items: &[(PackedCoord, VertexId)]) {
59        self.add_vertices_batch(items);
60    }
61
62    /// Add a vertex at the given coordinate to the index.
63    ///
64    /// ## Complexity
65    /// O(log n) where n is the number of vertices in the index
66    pub fn add_vertex(&mut self, coord: PackedCoord, vertex_id: VertexId) {
67        let row = coord.row();
68        let col = coord.col();
69
70        // Add to row tree - point interval [row, row]
71        self.row_tree
72            .entry(row, row)
73            .or_insert_with(HashSet::new)
74            .insert(vertex_id);
75
76        // Add to column tree - point interval [col, col]
77        self.col_tree
78            .entry(col, col)
79            .or_insert_with(HashSet::new)
80            .insert(vertex_id);
81    }
82
83    /// Add many vertices in a single pass. Assumes coords belong to same sheet index.
84    pub fn add_vertices_batch(&mut self, items: &[(PackedCoord, VertexId)]) {
85        if items.is_empty() {
86            return;
87        }
88        // If trees are empty we can bulk build from sorted points in O(n log n) with better constants.
89        if self.row_tree.is_empty() && self.col_tree.is_empty() {
90            // Build row points
91            let mut row_items: Vec<(u32, HashSet<VertexId>)> = Vec::with_capacity(items.len());
92            let mut col_items: Vec<(u32, HashSet<VertexId>)> = Vec::with_capacity(items.len());
93            // Use temp hash maps for merging duplicates
94            use rustc_hash::FxHashMap;
95            let mut row_map: FxHashMap<u32, HashSet<VertexId>> = FxHashMap::default();
96            let mut col_map: FxHashMap<u32, HashSet<VertexId>> = FxHashMap::default();
97            for (coord, vid) in items {
98                row_map.entry(coord.row()).or_default().insert(*vid);
99                col_map.entry(coord.col()).or_default().insert(*vid);
100            }
101            row_items.reserve(row_map.len());
102            for (k, v) in row_map.into_iter() {
103                row_items.push((k, v));
104            }
105            col_items.reserve(col_map.len());
106            for (k, v) in col_map.into_iter() {
107                col_items.push((k, v));
108            }
109            self.row_tree.bulk_build_points(row_items);
110            self.col_tree.bulk_build_points(col_items);
111            return;
112        }
113        // Fallback: incremental for already populated index
114        for (coord, vid) in items {
115            let row = coord.row();
116            let col = coord.col();
117            self.row_tree
118                .entry(row, row)
119                .or_insert_with(HashSet::new)
120                .insert(*vid);
121            self.col_tree
122                .entry(col, col)
123                .or_insert_with(HashSet::new)
124                .insert(*vid);
125        }
126    }
127
128    /// Remove a vertex from the index.
129    ///
130    /// ## Complexity
131    /// O(log n) where n is the number of vertices in the index
132    pub fn remove_vertex(&mut self, coord: PackedCoord, vertex_id: VertexId) {
133        let row = coord.row();
134        let col = coord.col();
135
136        // Remove from row tree
137        if let Some(vertices) = self.row_tree.get_mut(row, row) {
138            vertices.remove(&vertex_id);
139            // Note: We could remove empty intervals, but keeping them is fine
140            // as they take minimal space and may be reused
141        }
142
143        // Remove from column tree
144        if let Some(vertices) = self.col_tree.get_mut(col, col) {
145            vertices.remove(&vertex_id);
146        }
147    }
148
149    /// Update a vertex's position in the index (move operation).
150    ///
151    /// ## Complexity
152    /// O(log n) for removal + O(log n) for insertion = O(log n)
153    pub fn update_vertex(
154        &mut self,
155        old_coord: PackedCoord,
156        new_coord: PackedCoord,
157        vertex_id: VertexId,
158    ) {
159        self.remove_vertex(old_coord, vertex_id);
160        self.add_vertex(new_coord, vertex_id);
161    }
162
163    /// Query all vertices in the given row range.
164    ///
165    /// ## Complexity
166    /// O(log n + k) where k is the number of vertices in the range
167    pub fn vertices_in_row_range(&self, start: u32, end: u32) -> Vec<VertexId> {
168        let mut result = HashSet::new();
169
170        for (_low, _high, values) in self.row_tree.query(start, end) {
171            result.extend(values.iter());
172        }
173
174        result.into_iter().collect()
175    }
176
177    /// Query all vertices in the given column range.
178    ///
179    /// ## Complexity
180    /// O(log n + k) where k is the number of vertices in the range
181    pub fn vertices_in_col_range(&self, start: u32, end: u32) -> Vec<VertexId> {
182        let mut result = HashSet::new();
183
184        for (_low, _high, values) in self.col_tree.query(start, end) {
185            result.extend(values.iter());
186        }
187
188        result.into_iter().collect()
189    }
190
191    /// Query all vertices in a rectangular range.
192    ///
193    /// ## Complexity
194    /// O(log n + k) where k is the number of vertices in the range
195    pub fn vertices_in_rect(
196        &self,
197        start_row: u32,
198        end_row: u32,
199        start_col: u32,
200        end_col: u32,
201    ) -> Vec<VertexId> {
202        // Get vertices in row range
203        let row_vertices: HashSet<_> = self
204            .vertices_in_row_range(start_row, end_row)
205            .into_iter()
206            .collect();
207
208        // Get vertices in column range
209        let col_vertices: HashSet<_> = self
210            .vertices_in_col_range(start_col, end_col)
211            .into_iter()
212            .collect();
213
214        // Return intersection - vertices that are in both ranges
215        row_vertices.intersection(&col_vertices).copied().collect()
216    }
217
218    /// Get the total number of indexed vertices (may count duplicates if vertex appears at multiple positions).
219    pub fn len(&self) -> usize {
220        let mut unique: HashSet<VertexId> = HashSet::new();
221
222        // Collect all unique vertices from row tree
223        for (_low, _high, values) in self.row_tree.query(0, u32::MAX) {
224            unique.extend(values.iter());
225        }
226
227        unique.len()
228    }
229
230    /// Check if the index is empty.
231    pub fn is_empty(&self) -> bool {
232        self.row_tree.is_empty()
233    }
234
235    /// Clear all entries from the index.
236    pub fn clear(&mut self) {
237        self.row_tree = IntervalTree::new();
238        self.col_tree = IntervalTree::new();
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn test_add_and_query_single_vertex() {
248        let mut index = SheetIndex::new();
249        let coord = PackedCoord::new(5, 10);
250        let vertex_id = VertexId(1024);
251
252        index.add_vertex(coord, vertex_id);
253
254        // Query exact row
255        let row_results = index.vertices_in_row_range(5, 5);
256        assert_eq!(row_results, vec![vertex_id]);
257
258        // Query exact column
259        let col_results = index.vertices_in_col_range(10, 10);
260        assert_eq!(col_results, vec![vertex_id]);
261
262        // Query range containing the vertex
263        let row_results = index.vertices_in_row_range(3, 7);
264        assert_eq!(row_results, vec![vertex_id]);
265    }
266
267    #[test]
268    fn test_remove_vertex() {
269        let mut index = SheetIndex::new();
270        let coord = PackedCoord::new(5, 10);
271        let vertex_id = VertexId(1024);
272
273        index.add_vertex(coord, vertex_id);
274        assert_eq!(index.len(), 1);
275
276        index.remove_vertex(coord, vertex_id);
277        assert_eq!(index.len(), 0);
278
279        // Should return empty after removal
280        let row_results = index.vertices_in_row_range(5, 5);
281        assert!(row_results.is_empty());
282    }
283
284    #[test]
285    fn test_update_vertex_position() {
286        let mut index = SheetIndex::new();
287        let old_coord = PackedCoord::new(5, 10);
288        let new_coord = PackedCoord::new(15, 20);
289        let vertex_id = VertexId(1024);
290
291        index.add_vertex(old_coord, vertex_id);
292        index.update_vertex(old_coord, new_coord, vertex_id);
293
294        // Should not be at old position
295        let old_row_results = index.vertices_in_row_range(5, 5);
296        assert!(old_row_results.is_empty());
297
298        // Should be at new position
299        let new_row_results = index.vertices_in_row_range(15, 15);
300        assert_eq!(new_row_results, vec![vertex_id]);
301
302        let new_col_results = index.vertices_in_col_range(20, 20);
303        assert_eq!(new_col_results, vec![vertex_id]);
304    }
305
306    #[test]
307    fn test_range_queries() {
308        let mut index = SheetIndex::new();
309
310        // Add vertices in a pattern
311        for row in 0..10 {
312            for col in 0..5 {
313                let coord = PackedCoord::new(row, col);
314                let vertex_id = VertexId(1024 + row * 5 + col);
315                index.add_vertex(coord, vertex_id);
316            }
317        }
318
319        // Query rows 3-5 (should get 3 rows × 5 cols = 15 vertices)
320        let row_results = index.vertices_in_row_range(3, 5);
321        assert_eq!(row_results.len(), 15);
322
323        // Query columns 1-2 (should get 10 rows × 2 cols = 20 vertices)
324        let col_results = index.vertices_in_col_range(1, 2);
325        assert_eq!(col_results.len(), 20);
326
327        // Query rectangle (rows 3-5, cols 1-2) should get 3 × 2 = 6 vertices
328        let rect_results = index.vertices_in_rect(3, 5, 1, 2);
329        assert_eq!(rect_results.len(), 6);
330    }
331
332    #[test]
333    fn test_sparse_sheet_efficiency() {
334        let mut index = SheetIndex::new();
335
336        // Simulate sparse sheet - only a few cells in a million-row range
337        index.add_vertex(PackedCoord::new(100, 5), VertexId(1024));
338        index.add_vertex(PackedCoord::new(50_000, 10), VertexId(1025));
339        index.add_vertex(PackedCoord::new(100_000, 15), VertexId(1026));
340        index.add_vertex(PackedCoord::new(500_000, 20), VertexId(1027));
341        index.add_vertex(PackedCoord::new(999_999, 25), VertexId(1028));
342
343        assert_eq!(index.len(), 5);
344
345        // Query for rows >= 100_000 (should find 3 vertices efficiently)
346        let high_rows = index.vertices_in_row_range(100_000, u32::MAX);
347        assert_eq!(high_rows.len(), 3);
348
349        // Query for specific column range
350        let col_range = index.vertices_in_col_range(10, 20);
351        assert_eq!(col_range.len(), 3); // columns 10, 15, 20
352    }
353
354    #[test]
355    fn test_shift_operation_query() {
356        let mut index = SheetIndex::new();
357
358        // Setup: cells at rows 10, 20, 30, 40, 50
359        for row in [10, 20, 30, 40, 50] {
360            index.add_vertex(PackedCoord::new(row, 0), VertexId(1024 + row));
361        }
362
363        // Simulate "insert 5 rows at row 25" - need to find all vertices with row >= 25
364        let vertices_to_shift = index.vertices_in_row_range(25, u32::MAX);
365        assert_eq!(vertices_to_shift.len(), 3); // rows 30, 40, 50
366
367        // Simulate "delete columns B:D" - need to find all vertices in columns 1-3
368        for col in 1..=3 {
369            index.add_vertex(PackedCoord::new(5, col), VertexId(2000 + col));
370        }
371
372        let vertices_to_delete = index.vertices_in_col_range(1, 3);
373        assert_eq!(vertices_to_delete.len(), 3);
374    }
375
376    #[test]
377    fn test_viewport_query() {
378        let mut index = SheetIndex::new();
379
380        // Simulate a spreadsheet with scattered data
381        for row in (0..10000).step_by(100) {
382            for col in 0..10 {
383                index.add_vertex(PackedCoord::new(row, col), VertexId(row * 10 + col));
384            }
385        }
386
387        // Query viewport: rows 500-1500, columns 2-7
388        let viewport = index.vertices_in_rect(500, 1500, 2, 7);
389
390        // Should find 11 rows (500, 600, ..., 1500) × 6 columns (2-7) = 66 vertices
391        assert_eq!(viewport.len(), 66);
392    }
393}