vibesql_executor/select/late_materialization/
selection.rs

1//! Selection Vector Implementation
2//!
3//! A selection vector tracks which rows qualify from a source dataset,
4//! enabling late materialization by deferring actual data access.
5
6use std::ops::Range;
7
8/// A selection vector tracks qualifying row indices
9///
10/// This is the core data structure for late materialization. Instead of
11/// copying row data, we track which row indices qualify and only materialize
12/// when needed.
13///
14/// # Memory Layout
15///
16/// Uses `u32` indices (4 bytes each) instead of full rows (potentially
17/// hundreds of bytes each). For a 1M row table filtered to 10K rows:
18/// - Old: 1M × sizeof(Row) = potentially gigabytes
19/// - New: 10K × 4 bytes = 40KB
20///
21/// # Example
22///
23/// ```text
24/// // Create from filter bitmap
25/// let selection = SelectionVector::from_bitmap(&[true, false, true, true, false]);
26/// assert_eq!(selection.len(), 3);
27/// assert_eq!(selection.indices(), &[0, 2, 3]);
28///
29/// // Iterate over qualifying indices
30/// for idx in selection.iter() {
31///     let row = &source_rows[idx as usize];
32///     // Process row...
33/// }
34/// ```
35#[derive(Debug, Clone)]
36pub struct SelectionVector {
37    /// Indices of qualifying rows (sorted, unique)
38    indices: Vec<u32>,
39}
40
41impl SelectionVector {
42    /// Create an empty selection vector
43    #[inline]
44    pub fn empty() -> Self {
45        Self { indices: Vec::new() }
46    }
47
48    /// Create a selection vector that selects all rows in range [0, count)
49    #[inline]
50    pub fn all(count: usize) -> Self {
51        Self {
52            indices: (0..count as u32).collect(),
53        }
54    }
55
56    /// Create a selection vector from a range
57    #[inline]
58    pub fn from_range(range: Range<usize>) -> Self {
59        Self {
60            indices: (range.start as u32..range.end as u32).collect(),
61        }
62    }
63
64    /// Create from pre-computed indices (assumes sorted, unique)
65    #[inline]
66    pub fn from_indices(indices: Vec<u32>) -> Self {
67        Self { indices }
68    }
69
70    /// Create from a boolean filter bitmap
71    ///
72    /// This is the most common creation path after evaluating predicates.
73    ///
74    /// # Example
75    ///
76    /// ```text
77    /// let bitmap = vec![true, false, true, true, false];
78    /// let selection = SelectionVector::from_bitmap(&bitmap);
79    /// assert_eq!(selection.indices(), &[0, 2, 3]);
80    /// ```
81    pub fn from_bitmap(bitmap: &[bool]) -> Self {
82        let mut indices = Vec::with_capacity(bitmap.len() / 4); // Estimate 25% selectivity
83
84        for (i, &selected) in bitmap.iter().enumerate() {
85            if selected {
86                indices.push(i as u32);
87            }
88        }
89
90        Self { indices }
91    }
92
93    /// Create from a bitmap with pre-allocated capacity hint
94    ///
95    /// Use when you have an estimate of how many rows will match.
96    pub fn from_bitmap_with_capacity(bitmap: &[bool], capacity_hint: usize) -> Self {
97        let mut indices = Vec::with_capacity(capacity_hint);
98
99        for (i, &selected) in bitmap.iter().enumerate() {
100            if selected {
101                indices.push(i as u32);
102            }
103        }
104
105        Self { indices }
106    }
107
108    /// Number of selected rows
109    #[inline]
110    pub fn len(&self) -> usize {
111        self.indices.len()
112    }
113
114    /// Check if no rows are selected
115    #[inline]
116    pub fn is_empty(&self) -> bool {
117        self.indices.is_empty()
118    }
119
120    /// Get the underlying indices slice
121    #[inline]
122    pub fn indices(&self) -> &[u32] {
123        &self.indices
124    }
125
126    /// Consume and return the indices vector
127    #[inline]
128    pub fn into_indices(self) -> Vec<u32> {
129        self.indices
130    }
131
132    /// Get index at position
133    #[inline]
134    pub fn get(&self, pos: usize) -> Option<u32> {
135        self.indices.get(pos).copied()
136    }
137
138    /// Iterate over selected indices
139    #[inline]
140    pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
141        self.indices.iter().copied()
142    }
143
144    /// Intersect with another selection vector (AND operation)
145    ///
146    /// Returns indices that appear in both selections. Useful for
147    /// combining multiple filter conditions.
148    pub fn intersect(&self, other: &SelectionVector) -> SelectionVector {
149        // Both are sorted, so we can do a merge-style intersection
150        let mut result = Vec::with_capacity(self.len().min(other.len()));
151        let mut i = 0;
152        let mut j = 0;
153
154        while i < self.indices.len() && j < other.indices.len() {
155            match self.indices[i].cmp(&other.indices[j]) {
156                std::cmp::Ordering::Less => i += 1,
157                std::cmp::Ordering::Greater => j += 1,
158                std::cmp::Ordering::Equal => {
159                    result.push(self.indices[i]);
160                    i += 1;
161                    j += 1;
162                }
163            }
164        }
165
166        SelectionVector { indices: result }
167    }
168
169    /// Union with another selection vector (OR operation)
170    ///
171    /// Returns indices that appear in either selection.
172    pub fn union(&self, other: &SelectionVector) -> SelectionVector {
173        // Both are sorted, so we can do a merge-style union
174        let mut result = Vec::with_capacity(self.len() + other.len());
175        let mut i = 0;
176        let mut j = 0;
177
178        while i < self.indices.len() && j < other.indices.len() {
179            match self.indices[i].cmp(&other.indices[j]) {
180                std::cmp::Ordering::Less => {
181                    result.push(self.indices[i]);
182                    i += 1;
183                }
184                std::cmp::Ordering::Greater => {
185                    result.push(other.indices[j]);
186                    j += 1;
187                }
188                std::cmp::Ordering::Equal => {
189                    result.push(self.indices[i]);
190                    i += 1;
191                    j += 1;
192                }
193            }
194        }
195
196        // Append remaining
197        result.extend_from_slice(&self.indices[i..]);
198        result.extend_from_slice(&other.indices[j..]);
199
200        SelectionVector { indices: result }
201    }
202
203    /// Remap indices through another selection
204    ///
205    /// If `self` contains indices [0, 2, 3] and `base` contains [10, 20, 30, 40, 50],
206    /// returns [10, 30, 40] (indices 0, 2, 3 from base).
207    ///
208    /// This is used when chaining operations: if we filter a filtered result,
209    /// we need to map back to original row indices.
210    pub fn remap(&self, base: &SelectionVector) -> SelectionVector {
211        let remapped: Vec<u32> = self
212            .indices
213            .iter()
214            .filter_map(|&idx| base.get(idx as usize))
215            .collect();
216
217        SelectionVector { indices: remapped }
218    }
219
220    /// Create a dense bitmap representation
221    ///
222    /// Returns a boolean vector where `result[i] = true` iff `i` is in the selection.
223    /// `total_count` is the total number of rows in the source.
224    pub fn to_bitmap(&self, total_count: usize) -> Vec<bool> {
225        let mut bitmap = vec![false; total_count];
226        for &idx in &self.indices {
227            if (idx as usize) < total_count {
228                bitmap[idx as usize] = true;
229            }
230        }
231        bitmap
232    }
233
234    /// Filter this selection based on a predicate applied to a slice of data
235    ///
236    /// This is useful for applying additional filters without materializing rows.
237    pub fn filter<T, F>(&self, data: &[T], predicate: F) -> SelectionVector
238    where
239        F: Fn(&T) -> bool,
240    {
241        let filtered: Vec<u32> = self
242            .indices
243            .iter()
244            .filter(|&&idx| predicate(&data[idx as usize]))
245            .copied()
246            .collect();
247
248        SelectionVector { indices: filtered }
249    }
250
251    /// Apply a function to each selected index, collecting results
252    pub fn map<T, F>(&self, mut f: F) -> Vec<T>
253    where
254        F: FnMut(u32) -> T,
255    {
256        self.indices.iter().map(|&idx| f(idx)).collect()
257    }
258
259    /// Selectivity ratio (selected / total)
260    ///
261    /// Returns a value between 0.0 and 1.0 indicating what fraction
262    /// of the total rows are selected.
263    #[inline]
264    pub fn selectivity(&self, total_count: usize) -> f64 {
265        if total_count == 0 {
266            0.0
267        } else {
268            self.len() as f64 / total_count as f64
269        }
270    }
271
272    /// Compact representation check
273    ///
274    /// Returns true if this selection represents a contiguous range,
275    /// which allows for more efficient processing.
276    pub fn is_contiguous(&self) -> bool {
277        if self.indices.len() <= 1 {
278            return true;
279        }
280
281        let first = self.indices[0];
282        let last = self.indices[self.indices.len() - 1];
283        (last - first + 1) as usize == self.indices.len()
284    }
285
286    /// Get the range if this selection is contiguous
287    pub fn as_range(&self) -> Option<Range<usize>> {
288        if self.is_contiguous() && !self.indices.is_empty() {
289            Some(self.indices[0] as usize..self.indices[self.indices.len() - 1] as usize + 1)
290        } else {
291            None
292        }
293    }
294}
295
296impl Default for SelectionVector {
297    fn default() -> Self {
298        Self::empty()
299    }
300}
301
302impl From<Vec<u32>> for SelectionVector {
303    fn from(indices: Vec<u32>) -> Self {
304        Self::from_indices(indices)
305    }
306}
307
308impl From<Vec<usize>> for SelectionVector {
309    fn from(indices: Vec<usize>) -> Self {
310        Self::from_indices(indices.into_iter().map(|i| i as u32).collect())
311    }
312}
313
314impl IntoIterator for SelectionVector {
315    type Item = u32;
316    type IntoIter = std::vec::IntoIter<u32>;
317
318    fn into_iter(self) -> Self::IntoIter {
319        self.indices.into_iter()
320    }
321}
322
323impl<'a> IntoIterator for &'a SelectionVector {
324    type Item = &'a u32;
325    type IntoIter = std::slice::Iter<'a, u32>;
326
327    fn into_iter(self) -> Self::IntoIter {
328        self.indices.iter()
329    }
330}
331
332#[cfg(test)]
333mod selection_tests {
334    use super::*;
335
336    #[test]
337    fn test_from_bitmap() {
338        let bitmap = vec![true, false, true, true, false, true];
339        let selection = SelectionVector::from_bitmap(&bitmap);
340
341        assert_eq!(selection.len(), 4);
342        assert_eq!(selection.indices(), &[0, 2, 3, 5]);
343    }
344
345    #[test]
346    fn test_empty() {
347        let selection = SelectionVector::empty();
348        assert!(selection.is_empty());
349        assert_eq!(selection.len(), 0);
350    }
351
352    #[test]
353    fn test_all() {
354        let selection = SelectionVector::all(5);
355        assert_eq!(selection.len(), 5);
356        assert_eq!(selection.indices(), &[0, 1, 2, 3, 4]);
357    }
358
359    #[test]
360    fn test_intersect() {
361        let a = SelectionVector::from_indices(vec![1, 2, 4, 6, 8]);
362        let b = SelectionVector::from_indices(vec![2, 3, 4, 5, 6]);
363
364        let result = a.intersect(&b);
365        assert_eq!(result.indices(), &[2, 4, 6]);
366    }
367
368    #[test]
369    fn test_union() {
370        let a = SelectionVector::from_indices(vec![1, 3, 5]);
371        let b = SelectionVector::from_indices(vec![2, 3, 4]);
372
373        let result = a.union(&b);
374        assert_eq!(result.indices(), &[1, 2, 3, 4, 5]);
375    }
376
377    #[test]
378    fn test_remap() {
379        let base = SelectionVector::from_indices(vec![10, 20, 30, 40, 50]);
380        let child = SelectionVector::from_indices(vec![0, 2, 4]); // Select indices 0, 2, 4 from base
381
382        let remapped = child.remap(&base);
383        assert_eq!(remapped.indices(), &[10, 30, 50]);
384    }
385
386    #[test]
387    fn test_to_bitmap() {
388        let selection = SelectionVector::from_indices(vec![1, 3, 4]);
389        let bitmap = selection.to_bitmap(6);
390
391        assert_eq!(bitmap, vec![false, true, false, true, true, false]);
392    }
393
394    #[test]
395    fn test_is_contiguous() {
396        let contiguous = SelectionVector::from_indices(vec![2, 3, 4, 5]);
397        let non_contiguous = SelectionVector::from_indices(vec![2, 4, 6]);
398
399        assert!(contiguous.is_contiguous());
400        assert!(!non_contiguous.is_contiguous());
401    }
402
403    #[test]
404    fn test_as_range() {
405        let contiguous = SelectionVector::from_indices(vec![2, 3, 4, 5]);
406        assert_eq!(contiguous.as_range(), Some(2..6));
407
408        let non_contiguous = SelectionVector::from_indices(vec![2, 4, 6]);
409        assert_eq!(non_contiguous.as_range(), None);
410    }
411
412    #[test]
413    fn test_selectivity() {
414        let selection = SelectionVector::from_indices(vec![0, 5, 10]);
415        assert!((selection.selectivity(100) - 0.03).abs() < 0.001);
416        assert!((selection.selectivity(30) - 0.1).abs() < 0.001);
417    }
418
419    #[test]
420    fn test_filter() {
421        let data = vec![10, 20, 30, 40, 50];
422        let selection = SelectionVector::from_indices(vec![0, 2, 4]);
423
424        let filtered = selection.filter(&data, |&x| x > 20);
425        assert_eq!(filtered.indices(), &[2, 4]); // indices where data > 20
426    }
427}