vibesql_executor/select/late_materialization/
selection.rs

1//! Selection Vector Implementation
2//!
3//! A selection vector tracks which rows qualify from a source dataset,
4//! enabling late materialization by deferring actual data access.
5
6use std::ops::Range;
7
8/// A selection vector tracks qualifying row indices
9///
10/// This is the core data structure for late materialization. Instead of
11/// copying row data, we track which row indices qualify and only materialize
12/// when needed.
13///
14/// # Memory Layout
15///
16/// Uses `u32` indices (4 bytes each) instead of full rows (potentially
17/// hundreds of bytes each). For a 1M row table filtered to 10K rows:
18/// - Old: 1M × sizeof(Row) = potentially gigabytes
19/// - New: 10K × 4 bytes = 40KB
20///
21/// # Example
22///
23/// ```text
24/// // Create from filter bitmap
25/// let selection = SelectionVector::from_bitmap(&[true, false, true, true, false]);
26/// assert_eq!(selection.len(), 3);
27/// assert_eq!(selection.indices(), &[0, 2, 3]);
28///
29/// // Iterate over qualifying indices
30/// for idx in selection.iter() {
31///     let row = &source_rows[idx as usize];
32///     // Process row...
33/// }
34/// ```
35#[derive(Debug, Clone)]
36pub struct SelectionVector {
37    /// Indices of qualifying rows (sorted, unique)
38    indices: Vec<u32>,
39}
40
41impl SelectionVector {
42    /// Create an empty selection vector
43    #[inline]
44    pub fn empty() -> Self {
45        Self { indices: Vec::new() }
46    }
47
48    /// Create a selection vector that selects all rows in range [0, count)
49    #[inline]
50    pub fn all(count: usize) -> Self {
51        Self { indices: (0..count as u32).collect() }
52    }
53
54    /// Create a selection vector from a range
55    #[inline]
56    pub fn from_range(range: Range<usize>) -> Self {
57        Self { indices: (range.start as u32..range.end as u32).collect() }
58    }
59
60    /// Create from pre-computed indices (assumes sorted, unique)
61    #[inline]
62    pub fn from_indices(indices: Vec<u32>) -> Self {
63        Self { indices }
64    }
65
66    /// Create from a boolean filter bitmap
67    ///
68    /// This is the most common creation path after evaluating predicates.
69    ///
70    /// # Example
71    ///
72    /// ```text
73    /// let bitmap = vec![true, false, true, true, false];
74    /// let selection = SelectionVector::from_bitmap(&bitmap);
75    /// assert_eq!(selection.indices(), &[0, 2, 3]);
76    /// ```
77    pub fn from_bitmap(bitmap: &[bool]) -> Self {
78        let mut indices = Vec::with_capacity(bitmap.len() / 4); // Estimate 25% selectivity
79
80        for (i, &selected) in bitmap.iter().enumerate() {
81            if selected {
82                indices.push(i as u32);
83            }
84        }
85
86        Self { indices }
87    }
88
89    /// Create from a bitmap with pre-allocated capacity hint
90    ///
91    /// Use when you have an estimate of how many rows will match.
92    pub fn from_bitmap_with_capacity(bitmap: &[bool], capacity_hint: usize) -> Self {
93        let mut indices = Vec::with_capacity(capacity_hint);
94
95        for (i, &selected) in bitmap.iter().enumerate() {
96            if selected {
97                indices.push(i as u32);
98            }
99        }
100
101        Self { indices }
102    }
103
104    /// Number of selected rows
105    #[inline]
106    pub fn len(&self) -> usize {
107        self.indices.len()
108    }
109
110    /// Check if no rows are selected
111    #[inline]
112    pub fn is_empty(&self) -> bool {
113        self.indices.is_empty()
114    }
115
116    /// Get the underlying indices slice
117    #[inline]
118    pub fn indices(&self) -> &[u32] {
119        &self.indices
120    }
121
122    /// Consume and return the indices vector
123    #[inline]
124    pub fn into_indices(self) -> Vec<u32> {
125        self.indices
126    }
127
128    /// Get index at position
129    #[inline]
130    pub fn get(&self, pos: usize) -> Option<u32> {
131        self.indices.get(pos).copied()
132    }
133
134    /// Iterate over selected indices
135    #[inline]
136    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
137        self.indices.iter().copied()
138    }
139
140    /// Intersect with another selection vector (AND operation)
141    ///
142    /// Returns indices that appear in both selections. Useful for
143    /// combining multiple filter conditions.
144    pub fn intersect(&self, other: &SelectionVector) -> SelectionVector {
145        // Both are sorted, so we can do a merge-style intersection
146        let mut result = Vec::with_capacity(self.len().min(other.len()));
147        let mut i = 0;
148        let mut j = 0;
149
150        while i < self.indices.len() && j < other.indices.len() {
151            match self.indices[i].cmp(&other.indices[j]) {
152                std::cmp::Ordering::Less => i += 1,
153                std::cmp::Ordering::Greater => j += 1,
154                std::cmp::Ordering::Equal => {
155                    result.push(self.indices[i]);
156                    i += 1;
157                    j += 1;
158                }
159            }
160        }
161
162        SelectionVector { indices: result }
163    }
164
165    /// Union with another selection vector (OR operation)
166    ///
167    /// Returns indices that appear in either selection.
168    pub fn union(&self, other: &SelectionVector) -> SelectionVector {
169        // Both are sorted, so we can do a merge-style union
170        let mut result = Vec::with_capacity(self.len() + other.len());
171        let mut i = 0;
172        let mut j = 0;
173
174        while i < self.indices.len() && j < other.indices.len() {
175            match self.indices[i].cmp(&other.indices[j]) {
176                std::cmp::Ordering::Less => {
177                    result.push(self.indices[i]);
178                    i += 1;
179                }
180                std::cmp::Ordering::Greater => {
181                    result.push(other.indices[j]);
182                    j += 1;
183                }
184                std::cmp::Ordering::Equal => {
185                    result.push(self.indices[i]);
186                    i += 1;
187                    j += 1;
188                }
189            }
190        }
191
192        // Append remaining
193        result.extend_from_slice(&self.indices[i..]);
194        result.extend_from_slice(&other.indices[j..]);
195
196        SelectionVector { indices: result }
197    }
198
199    /// Remap indices through another selection
200    ///
201    /// If `self` contains indices [0, 2, 3] and `base` contains [10, 20, 30, 40, 50],
202    /// returns [10, 30, 40] (indices 0, 2, 3 from base).
203    ///
204    /// This is used when chaining operations: if we filter a filtered result,
205    /// we need to map back to original row indices.
206    pub fn remap(&self, base: &SelectionVector) -> SelectionVector {
207        let remapped: Vec<u32> =
208            self.indices.iter().filter_map(|&idx| base.get(idx as usize)).collect();
209
210        SelectionVector { indices: remapped }
211    }
212
213    /// Create a dense bitmap representation
214    ///
215    /// Returns a boolean vector where `result[i] = true` iff `i` is in the selection.
216    /// `total_count` is the total number of rows in the source.
217    pub fn to_bitmap(&self, total_count: usize) -> Vec<bool> {
218        let mut bitmap = vec![false; total_count];
219        for &idx in &self.indices {
220            if (idx as usize) < total_count {
221                bitmap[idx as usize] = true;
222            }
223        }
224        bitmap
225    }
226
227    /// Filter this selection based on a predicate applied to a slice of data
228    ///
229    /// This is useful for applying additional filters without materializing rows.
230    pub fn filter<T, F>(&self, data: &[T], predicate: F) -> SelectionVector
231    where
232        F: Fn(&T) -> bool,
233    {
234        let filtered: Vec<u32> =
235            self.indices.iter().filter(|&&idx| predicate(&data[idx as usize])).copied().collect();
236
237        SelectionVector { indices: filtered }
238    }
239
240    /// Apply a function to each selected index, collecting results
241    pub fn map<T, F>(&self, mut f: F) -> Vec<T>
242    where
243        F: FnMut(u32) -> T,
244    {
245        self.indices.iter().map(|&idx| f(idx)).collect()
246    }
247
248    /// Selectivity ratio (selected / total)
249    ///
250    /// Returns a value between 0.0 and 1.0 indicating what fraction
251    /// of the total rows are selected.
252    #[inline]
253    pub fn selectivity(&self, total_count: usize) -> f64 {
254        if total_count == 0 {
255            0.0
256        } else {
257            self.len() as f64 / total_count as f64
258        }
259    }
260
261    /// Compact representation check
262    ///
263    /// Returns true if this selection represents a contiguous range,
264    /// which allows for more efficient processing.
265    pub fn is_contiguous(&self) -> bool {
266        if self.indices.len() <= 1 {
267            return true;
268        }
269
270        let first = self.indices[0];
271        let last = self.indices[self.indices.len() - 1];
272        (last - first + 1) as usize == self.indices.len()
273    }
274
275    /// Get the range if this selection is contiguous
276    pub fn as_range(&self) -> Option<Range<usize>> {
277        if self.is_contiguous() && !self.indices.is_empty() {
278            Some(self.indices[0] as usize..self.indices[self.indices.len() - 1] as usize + 1)
279        } else {
280            None
281        }
282    }
283}
284
285impl Default for SelectionVector {
286    fn default() -> Self {
287        Self::empty()
288    }
289}
290
291impl From<Vec<u32>> for SelectionVector {
292    fn from(indices: Vec<u32>) -> Self {
293        Self::from_indices(indices)
294    }
295}
296
297impl From<Vec<usize>> for SelectionVector {
298    fn from(indices: Vec<usize>) -> Self {
299        Self::from_indices(indices.into_iter().map(|i| i as u32).collect())
300    }
301}
302
303impl IntoIterator for SelectionVector {
304    type Item = u32;
305    type IntoIter = std::vec::IntoIter<u32>;
306
307    fn into_iter(self) -> Self::IntoIter {
308        self.indices.into_iter()
309    }
310}
311
312impl<'a> IntoIterator for &'a SelectionVector {
313    type Item = &'a u32;
314    type IntoIter = std::slice::Iter<'a, u32>;
315
316    fn into_iter(self) -> Self::IntoIter {
317        self.indices.iter()
318    }
319}
320
321#[cfg(test)]
322mod selection_tests {
323    use super::*;
324
325    #[test]
326    fn test_from_bitmap() {
327        let bitmap = vec![true, false, true, true, false, true];
328        let selection = SelectionVector::from_bitmap(&bitmap);
329
330        assert_eq!(selection.len(), 4);
331        assert_eq!(selection.indices(), &[0, 2, 3, 5]);
332    }
333
334    #[test]
335    fn test_empty() {
336        let selection = SelectionVector::empty();
337        assert!(selection.is_empty());
338        assert_eq!(selection.len(), 0);
339    }
340
341    #[test]
342    fn test_all() {
343        let selection = SelectionVector::all(5);
344        assert_eq!(selection.len(), 5);
345        assert_eq!(selection.indices(), &[0, 1, 2, 3, 4]);
346    }
347
348    #[test]
349    fn test_intersect() {
350        let a = SelectionVector::from_indices(vec![1, 2, 4, 6, 8]);
351        let b = SelectionVector::from_indices(vec![2, 3, 4, 5, 6]);
352
353        let result = a.intersect(&b);
354        assert_eq!(result.indices(), &[2, 4, 6]);
355    }
356
357    #[test]
358    fn test_union() {
359        let a = SelectionVector::from_indices(vec![1, 3, 5]);
360        let b = SelectionVector::from_indices(vec![2, 3, 4]);
361
362        let result = a.union(&b);
363        assert_eq!(result.indices(), &[1, 2, 3, 4, 5]);
364    }
365
366    #[test]
367    fn test_remap() {
368        let base = SelectionVector::from_indices(vec![10, 20, 30, 40, 50]);
369        let child = SelectionVector::from_indices(vec![0, 2, 4]); // Select indices 0, 2, 4 from base
370
371        let remapped = child.remap(&base);
372        assert_eq!(remapped.indices(), &[10, 30, 50]);
373    }
374
375    #[test]
376    fn test_to_bitmap() {
377        let selection = SelectionVector::from_indices(vec![1, 3, 4]);
378        let bitmap = selection.to_bitmap(6);
379
380        assert_eq!(bitmap, vec![false, true, false, true, true, false]);
381    }
382
383    #[test]
384    fn test_is_contiguous() {
385        let contiguous = SelectionVector::from_indices(vec![2, 3, 4, 5]);
386        let non_contiguous = SelectionVector::from_indices(vec![2, 4, 6]);
387
388        assert!(contiguous.is_contiguous());
389        assert!(!non_contiguous.is_contiguous());
390    }
391
392    #[test]
393    fn test_as_range() {
394        let contiguous = SelectionVector::from_indices(vec![2, 3, 4, 5]);
395        assert_eq!(contiguous.as_range(), Some(2..6));
396
397        let non_contiguous = SelectionVector::from_indices(vec![2, 4, 6]);
398        assert_eq!(non_contiguous.as_range(), None);
399    }
400
401    #[test]
402    fn test_selectivity() {
403        let selection = SelectionVector::from_indices(vec![0, 5, 10]);
404        assert!((selection.selectivity(100) - 0.03).abs() < 0.001);
405        assert!((selection.selectivity(30) - 0.1).abs() < 0.001);
406    }
407
408    #[test]
409    fn test_filter() {
410        let data = vec![10, 20, 30, 40, 50];
411        let selection = SelectionVector::from_indices(vec![0, 2, 4]);
412
413        let filtered = selection.filter(&data, |&x| x > 20);
414        assert_eq!(filtered.indices(), &[2, 4]); // indices where data > 20
415    }
416}