vortex_layout/scan/
selection.rs

1use std::ops::{Not, Range};
2
3use vortex_buffer::Buffer;
4use vortex_error::VortexExpect;
5use vortex_mask::Mask;
6
7use crate::scan::row_mask::RowMask;
8
9/// A selection identifies a set of rows to include in the scan (in addition to applying any
10/// filter predicates).
11#[derive(Default)]
12pub enum Selection {
13    /// No selection, all rows are included.
14    #[default]
15    All,
16    /// A selection of rows to include by index.
17    IncludeByIndex(Buffer<u64>),
18    /// A selection of rows to exclude by index.
19    ExcludeByIndex(Buffer<u64>),
20    /// A selection of rows to include using a [`roaring::RoaringTreemap`].
21    #[cfg(feature = "roaring")]
22    IncludeRoaring(roaring::RoaringTreemap),
23    /// A selection of rows to exclude using a [`roaring::RoaringTreemap`].
24    #[cfg(feature = "roaring")]
25    ExcludeRoaring(roaring::RoaringTreemap),
26}
27
28impl Selection {
29    /// Extract the [`RowMask`] for the given range from this selection.
30    pub(crate) fn row_mask(&self, range: &Range<u64>) -> RowMask {
31        let range_len = usize::try_from(range.end - range.start)
32            .vortex_expect("Range length does not fit into a usize");
33
34        match self {
35            Selection::All => RowMask::new(range.start, Mask::new_true(range_len)),
36            Selection::IncludeByIndex(include) => {
37                let mask = indices_range(range, include)
38                    .map(|idx_range| {
39                        Mask::from_indices(
40                            range_len,
41                            include
42                                .slice(idx_range)
43                                .iter()
44                                .map(|idx| *idx - range.start)
45                                .map(|idx| {
46                                    usize::try_from(idx)
47                                        .vortex_expect("Index does not fit into a usize")
48                                })
49                                .collect(),
50                        )
51                    })
52                    .unwrap_or_else(|| Mask::new_false(range_len));
53
54                RowMask::new(range.start, mask)
55            }
56            Selection::ExcludeByIndex(exclude) => {
57                let mask = Selection::IncludeByIndex(exclude.clone())
58                    .row_mask(range)
59                    .mask()
60                    .clone();
61                RowMask::new(range.start, mask.not())
62            }
63            #[cfg(feature = "roaring")]
64            Selection::IncludeRoaring(roaring) => {
65                use std::ops::BitAnd;
66
67                // First we perform a cheap is_disjoint check
68                let mut range_treemap = roaring::RoaringTreemap::new();
69                range_treemap.insert_range(range.clone());
70
71                if roaring.is_disjoint(&range_treemap) {
72                    return RowMask::new(range.start, Mask::new_false(range_len));
73                }
74
75                // Otherwise, intersect with the selected range and shift to relativize.
76                let roaring = roaring.bitand(range_treemap);
77                let mask = Mask::from_indices(
78                    range_len,
79                    roaring
80                        .iter()
81                        .map(|idx| idx - range.start)
82                        .map(|idx| {
83                            usize::try_from(idx).vortex_expect("Index does not fit into a usize")
84                        })
85                        .collect(),
86                );
87
88                RowMask::new(range.start, mask)
89            }
90            #[cfg(feature = "roaring")]
91            Selection::ExcludeRoaring(roaring) => {
92                use std::ops::BitAnd;
93
94                let mut range_treemap = roaring::RoaringTreemap::new();
95                range_treemap.insert_range(range.clone());
96
97                // If there are no deletions in the intersection, then we have an all true mask.
98                if roaring.intersection_len(&range_treemap) == range_len as u64 {
99                    return RowMask::new(range.start, Mask::new_true(range_len));
100                }
101
102                // Otherwise, intersect with the selected range and shift to relativize.
103                let roaring = roaring.bitand(range_treemap);
104                let mask = Mask::from_excluded_indices(
105                    range_len,
106                    roaring.iter().map(|idx| idx - range.start).map(|idx| {
107                        usize::try_from(idx).vortex_expect("Index does not fit into a usize")
108                    }),
109                );
110
111                RowMask::new(range.start, mask)
112            }
113        }
114    }
115}
116
117/// Find the positional range within row_indices that covers all rows in the given range.
118fn indices_range(range: &Range<u64>, row_indices: &[u64]) -> Option<Range<usize>> {
119    if row_indices.first().is_some_and(|&first| first >= range.end)
120        || row_indices.last().is_some_and(|&last| range.start >= last)
121    {
122        return None;
123    }
124
125    // For the given row range, find the indices that are within the row_indices.
126    let start_idx = row_indices
127        .binary_search(&range.start)
128        .unwrap_or_else(|x| x);
129    let end_idx = row_indices.binary_search(&range.end).unwrap_or_else(|x| x);
130
131    (start_idx != end_idx).then_some(start_idx..end_idx)
132}