Skip to main content

lance_select/mask/
nullable.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use lance_core::deepsize::DeepSizeOf;
5
6use super::{RowAddrMask, RowAddrTreeMap, RowSetOps};
7
8/// A set of row ids, with optional set of nulls.
9///
10/// This is often a result of a filter, where `selected` represents the rows that
11/// passed the filter, and `nulls` represents the rows where the filter evaluated
12/// to null. For example, in SQL `NULL > 5` evaluates to null. This is distinct
13/// from being deselected to support proper three-valued logic for NOT.
14/// (`NOT FALSE` is TRUE, `NOT TRUE` is FALSE, but `NOT NULL` is NULL.
15/// `NULL | TRUE = TRUE`, `NULL & FALSE = FALSE`, but `NULL | FALSE = NULL`
16/// and `NULL & TRUE = NULL`).
17#[derive(Clone, Debug, Default, DeepSizeOf)]
18pub struct NullableRowAddrSet {
19    selected: RowAddrTreeMap,
20    // Rows that are NULL. These rows are considered NULL even if they are also in `selected`.
21    nulls: RowAddrTreeMap,
22}
23
24impl NullableRowAddrSet {
25    /// Create a new RowSelection from selected rows and null rows.
26    ///
27    /// `nulls` may have overlap with `selected`. Rows in `nulls` are considered NULL,
28    /// even if they are also in `selected`.
29    pub fn new(selected: RowAddrTreeMap, nulls: RowAddrTreeMap) -> Self {
30        Self { selected, nulls }
31    }
32
33    pub fn with_nulls(mut self, nulls: RowAddrTreeMap) -> Self {
34        self.nulls = nulls;
35        self
36    }
37
38    /// Create an empty selection. Alias for [Default::default]
39    pub fn empty() -> Self {
40        Default::default()
41    }
42
43    /// Get the number of TRUE rows (selected but not null).
44    ///
45    /// Returns None if the number of TRUE rows cannot be determined. This happens
46    /// if the underlying RowAddrTreeMap has full fragments selected.
47    pub fn len(&self) -> Option<u64> {
48        self.true_rows().len()
49    }
50
51    pub fn is_empty(&self) -> bool {
52        self.selected.is_empty()
53    }
54
55    /// Check if a row_id is selected (TRUE)
56    pub fn selected(&self, row_id: u64) -> bool {
57        self.selected.contains(row_id) && !self.nulls.contains(row_id)
58    }
59
60    /// Get the null rows
61    pub fn null_rows(&self) -> &RowAddrTreeMap {
62        &self.nulls
63    }
64
65    /// Get the raw `selected` bitmap.
66    ///
67    /// This is the backing field, **not** a semantic "TRUE ∪ NULL" set: a NULL
68    /// row may be stored only in `nulls` without appearing in `selected`. Use
69    /// this when you want a zero-copy view of the raw representation (e.g.
70    /// wire serialization that sends `selected` and `nulls` as separate sets).
71    /// For "TRUE rows only", use [`Self::true_rows`].
72    pub fn selected_rows(&self) -> &RowAddrTreeMap {
73        &self.selected
74    }
75
76    /// Get the TRUE rows (selected but not null)
77    pub fn true_rows(&self) -> RowAddrTreeMap {
78        self.selected.clone() - self.nulls.clone()
79    }
80
81    pub fn union_all(selections: &[Self]) -> Self {
82        let selected = RowAddrTreeMap::union_all(
83            &selections
84                .iter()
85                .map(|s| &s.selected)
86                .collect::<Vec<&RowAddrTreeMap>>(),
87        );
88        let nulls = RowAddrTreeMap::union_all(
89            &selections
90                .iter()
91                .map(|s| &s.nulls)
92                .collect::<Vec<&RowAddrTreeMap>>(),
93        );
94        // TRUE | NULL = TRUE, so remove any TRUE rows from nulls.
95        // A row is TRUE in some input iff it's in that input's (selected - nulls).
96        let any_true = selections
97            .iter()
98            .map(|s| s.selected.clone() - &s.nulls)
99            .fold(RowAddrTreeMap::new(), |acc, t| acc | t);
100        let nulls = nulls - &any_true;
101        Self { selected, nulls }
102    }
103}
104
105impl PartialEq for NullableRowAddrSet {
106    fn eq(&self, other: &Self) -> bool {
107        // Semantic equality: two sets are equal iff they decode to the same
108        // Kleene state on every row. Comparing raw `selected` would be wrong
109        // because a NULL row can be represented either inside or outside the
110        // `selected` bitmap.
111        self.true_rows() == other.true_rows() && self.nulls == other.nulls
112    }
113}
114
115impl std::ops::BitAndAssign<&Self> for NullableRowAddrSet {
116    fn bitand_assign(&mut self, rhs: &Self) {
117        self.nulls = if self.nulls.is_empty() && rhs.nulls.is_empty() {
118            RowAddrTreeMap::new() // Fast path
119        } else {
120            (self.nulls.clone() & &rhs.nulls) // null and null -> null
121            | (self.nulls.clone() & &rhs.selected) // null and true -> null
122            | (rhs.nulls.clone() & &self.selected) // true and null -> null
123        };
124
125        self.selected &= &rhs.selected;
126    }
127}
128
129impl std::ops::BitOrAssign<&Self> for NullableRowAddrSet {
130    fn bitor_assign(&mut self, rhs: &Self) {
131        self.nulls = if self.nulls.is_empty() && rhs.nulls.is_empty() {
132            RowAddrTreeMap::new() // Fast path
133        } else {
134            // null or null -> null (excluding rows that are true in either)
135            let true_rows =
136                (self.selected.clone() - &self.nulls) | (rhs.selected.clone() - &rhs.nulls);
137            (self.nulls.clone() | &rhs.nulls) - true_rows
138        };
139
140        self.selected |= &rhs.selected;
141    }
142}
143
144/// A version of [`RowAddrMask`] that supports nulls.
145///
146/// This mask handles three-valued logic for SQL expressions, where a filter can
147/// evaluate to TRUE, FALSE, or NULL. The `selected` set includes rows that are
148/// TRUE or NULL. The `nulls` set includes rows that are NULL.
149#[derive(Clone, Debug, PartialEq)]
150pub enum NullableRowAddrMask {
151    AllowList(NullableRowAddrSet),
152    BlockList(NullableRowAddrSet),
153}
154
155impl NullableRowAddrMask {
156    /// A mask that matches no rows (TRUE = ∅, NULL = ∅).
157    pub fn allow_nothing() -> Self {
158        Self::AllowList(NullableRowAddrSet::empty())
159    }
160
161    /// A mask that matches every row (TRUE = universe, NULL = ∅).
162    pub fn all_rows() -> Self {
163        Self::BlockList(NullableRowAddrSet::empty())
164    }
165
166    pub fn selected(&self, row_id: u64) -> bool {
167        match self {
168            Self::AllowList(NullableRowAddrSet { selected, nulls }) => {
169                selected.contains(row_id) && !nulls.contains(row_id)
170            }
171            Self::BlockList(NullableRowAddrSet { selected, nulls }) => {
172                !selected.contains(row_id) && !nulls.contains(row_id)
173            }
174        }
175    }
176
177    pub fn drop_nulls(self) -> RowAddrMask {
178        match self {
179            Self::AllowList(NullableRowAddrSet { selected, nulls }) => {
180                RowAddrMask::AllowList(selected - nulls)
181            }
182            Self::BlockList(NullableRowAddrSet { selected, nulls }) => {
183                RowAddrMask::BlockList(selected | nulls)
184            }
185        }
186    }
187}
188
189impl std::ops::Not for NullableRowAddrMask {
190    type Output = Self;
191
192    fn not(self) -> Self::Output {
193        match self {
194            Self::AllowList(set) => Self::BlockList(set),
195            Self::BlockList(set) => Self::AllowList(set),
196        }
197    }
198}
199
200impl std::ops::BitAnd for NullableRowAddrMask {
201    type Output = Self;
202
203    fn bitand(self, rhs: Self) -> Self::Output {
204        // Null handling:
205        // * null and true -> null
206        // * null and null -> null
207        // * null and false -> false
208        match (self, rhs) {
209            (Self::AllowList(a), Self::AllowList(b)) => {
210                let nulls = if a.nulls.is_empty() && b.nulls.is_empty() {
211                    RowAddrTreeMap::new() // Fast path
212                } else {
213                    (a.nulls.clone() & &b.nulls) // null and null -> null
214                    | (a.nulls & &b.selected) // null and true -> null
215                    | (b.nulls & &a.selected) // true and null -> null
216                };
217                let selected = a.selected & b.selected;
218                Self::AllowList(NullableRowAddrSet { selected, nulls })
219            }
220            (Self::AllowList(allow), Self::BlockList(block))
221            | (Self::BlockList(block), Self::AllowList(allow)) => {
222                let nulls = if allow.nulls.is_empty() && block.nulls.is_empty() {
223                    RowAddrTreeMap::new() // Fast path
224                } else {
225                    (allow.nulls.clone() & &block.nulls) // null and null -> null
226                    | (allow.nulls - &block.selected) // null and true -> null
227                    | (block.nulls & &allow.selected) // true and null -> null
228                };
229                let selected = allow.selected - block.selected;
230                Self::AllowList(NullableRowAddrSet { selected, nulls })
231            }
232            (Self::BlockList(a), Self::BlockList(b)) => {
233                let nulls = if a.nulls.is_empty() && b.nulls.is_empty() {
234                    RowAddrTreeMap::new() // Fast path
235                } else {
236                    (a.nulls.clone() & &b.nulls) // null and null -> null
237                    | (a.nulls - &b.selected) // null and true -> null
238                    | (b.nulls - &a.selected) // true and null -> null
239                };
240                let selected = a.selected | b.selected;
241                Self::BlockList(NullableRowAddrSet { selected, nulls })
242            }
243        }
244    }
245}
246
247impl std::ops::BitOr for NullableRowAddrMask {
248    type Output = Self;
249
250    fn bitor(self, rhs: Self) -> Self::Output {
251        // Null handling:
252        // * null or true -> true
253        // * null or null -> null
254        // * null or false -> null
255        match (self, rhs) {
256            (Self::AllowList(a), Self::AllowList(b)) => {
257                let nulls = if a.nulls.is_empty() && b.nulls.is_empty() {
258                    RowAddrTreeMap::new() // Fast path
259                } else {
260                    // null or null -> null (excluding rows that are true in either)
261                    let true_rows =
262                        (a.selected.clone() - &a.nulls) | (b.selected.clone() - &b.nulls);
263                    (a.nulls | b.nulls) - true_rows
264                };
265                let selected = (a.selected | b.selected) | &nulls;
266                Self::AllowList(NullableRowAddrSet { selected, nulls })
267            }
268            (Self::AllowList(allow), Self::BlockList(block))
269            | (Self::BlockList(block), Self::AllowList(allow)) => {
270                let allow_true = allow.selected.clone() - &allow.nulls;
271                let block_false = block.selected.clone() - &block.nulls;
272
273                let nulls = if allow.nulls.is_empty() && block.nulls.is_empty() {
274                    RowAddrTreeMap::new() // Fast path
275                } else {
276                    // NULL|FALSE=NULL, FALSE|NULL=NULL, NULL|NULL=NULL, TRUE|NULL=TRUE.
277                    // So NULL rows are: (allow NULL & block FALSE) or (block NULL & allow not TRUE).
278                    (allow.nulls & &block_false) | (block.nulls - &allow_true)
279                };
280                let selected = (block_false - &allow_true) | &nulls;
281                Self::BlockList(NullableRowAddrSet { selected, nulls })
282            }
283            (Self::BlockList(a), Self::BlockList(b)) => {
284                let a_false = a.selected.clone() - &a.nulls;
285                let b_false = b.selected.clone() - &b.nulls;
286                let nulls = if a.nulls.is_empty() && b.nulls.is_empty() {
287                    RowAddrTreeMap::new() // Fast path
288                } else {
289                    // NULL if: (A NULL & B FALSE) or (A FALSE & B NULL) or (A NULL & B NULL).
290                    (a.nulls.clone() & &b_false)
291                        | (b.nulls.clone() & &a_false)
292                        | (a.nulls & &b.nulls)
293                };
294                let selected = (a_false & b_false) | &nulls;
295                Self::BlockList(NullableRowAddrSet { selected, nulls })
296            }
297        }
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    fn rows(ids: &[u64]) -> RowAddrTreeMap {
306        RowAddrTreeMap::from_iter(ids)
307    }
308
309    fn nullable_set(selected: &[u64], nulls: &[u64]) -> NullableRowAddrSet {
310        NullableRowAddrSet::new(rows(selected), rows(nulls))
311    }
312
313    fn allow(selected: &[u64], nulls: &[u64]) -> NullableRowAddrMask {
314        NullableRowAddrMask::AllowList(nullable_set(selected, nulls))
315    }
316
317    fn block(selected: &[u64], nulls: &[u64]) -> NullableRowAddrMask {
318        NullableRowAddrMask::BlockList(nullable_set(selected, nulls))
319    }
320
321    fn assert_mask_selects(mask: &NullableRowAddrMask, selected: &[u64], not_selected: &[u64]) {
322        for &id in selected {
323            assert!(mask.selected(id), "Expected row {} to be selected", id);
324        }
325        for &id in not_selected {
326            assert!(!mask.selected(id), "Expected row {} to NOT be selected", id);
327        }
328    }
329
330    #[test]
331    fn test_not_with_nulls() {
332        // Test case from issue #4756: x != 5 on data [0, 5, null]
333        // x = 5 should return: AllowList with selected=[1,2], nulls=[2]
334        // NOT(x = 5) should return: BlockList with selected=[1,2], nulls=[2]
335        // selected() should return TRUE for row 0, FALSE for rows 1 and 2
336        let mask = allow(&[1, 2], &[2]);
337        let not_mask = !mask;
338
339        // Row 0: selected (x=0, which is != 5)
340        // Row 1: NOT selected (x=5, which is == 5)
341        // Row 2: NOT selected (x=null, comparison result is null)
342        assert_mask_selects(&not_mask, &[0], &[1, 2]);
343    }
344
345    #[test]
346    fn test_and_with_nulls() {
347        // Test Kleene AND logic: true AND null = null, false AND null = false
348
349        // Case 1: TRUE mask AND mask with nulls
350        let true_mask = allow(&[0, 1, 2, 3, 4], &[]);
351        let null_mask = allow(&[0, 1, 2, 3, 4], &[1, 3]);
352        let result = true_mask & null_mask.clone();
353
354        // TRUE AND TRUE = TRUE; TRUE AND NULL = NULL (filtered out)
355        assert_mask_selects(&result, &[0, 2, 4], &[1, 3]);
356
357        // Case 2: FALSE mask AND mask with nulls
358        let false_mask = block(&[0, 1, 2, 3, 4], &[]);
359        let result = false_mask & null_mask;
360
361        // FALSE AND anything = FALSE
362        assert_mask_selects(&result, &[], &[0, 1, 2, 3, 4]);
363
364        // Case 3: Both masks have nulls - union of null sets
365        let mask1 = allow(&[0, 1, 2], &[1]);
366        let mask2 = allow(&[0, 2, 3], &[2]);
367        let result = mask1 & mask2;
368
369        // Only row 0 is TRUE in both; rows 1,2 are null in at least one; row 3 not in first
370        assert_mask_selects(&result, &[0], &[1, 2, 3]);
371    }
372
373    #[test]
374    fn test_or_with_nulls() {
375        // Test Kleene OR logic: true OR null = true, false OR null = null
376
377        // Case 1: FALSE mask OR mask with nulls
378        let false_mask = block(&[0, 1, 2], &[]);
379        let null_mask = allow(&[0, 1, 2], &[1, 2]);
380        let result = false_mask | null_mask.clone();
381
382        // FALSE OR TRUE = TRUE; FALSE OR NULL = NULL (filtered out)
383        assert_mask_selects(&result, &[0], &[1, 2]);
384
385        // Case 2: TRUE mask OR mask with nulls
386        let true_mask = allow(&[0, 1, 2], &[]);
387        let result = true_mask | null_mask;
388
389        // TRUE OR anything = TRUE
390        assert_mask_selects(&result, &[0, 1, 2], &[]);
391
392        // Case 3: Both have nulls
393        let mask1 = block(&[0, 1, 2, 3], &[1, 2]);
394        let mask2 = block(&[0, 1, 2, 3], &[2, 3]);
395        let result = mask1 | mask2;
396
397        // Row 0: FALSE in both; Rows 1,2,3: NULL in at least one
398        assert_mask_selects(&result, &[], &[0, 1, 2, 3]);
399    }
400
401    #[test]
402    fn test_or_allow_block_keeps_block_nulls() {
403        // Allow|Block OR must preserve NULLs from block even when block.selected is empty.
404        // allow: TRUE=[1], NULL=[0]; block: FALSE=[], NULL=[0]
405        let allow_mask = allow(&[1], &[0]);
406        let block_mask = block(&[], &[0]);
407        let result = allow_mask | block_mask;
408
409        // Row 1 is TRUE; row 0 remains NULL (not selected)
410        assert_mask_selects(&result, &[1], &[0]);
411    }
412
413    #[test]
414    fn test_or_allow_block_keeps_block_nulls_with_false_rows() {
415        // Ensure FALSE stays FALSE and NULL stays NULL when both appear on the block side.
416        // allow: TRUE=[2], NULL=[]; block: FALSE=[1], NULL=[0]
417        let allow_mask = allow(&[2], &[]);
418        let block_mask = block(&[1], &[0]);
419        let result = allow_mask | block_mask;
420
421        // Row 2 is TRUE; row 1 is FALSE; row 0 remains NULL (not selected)
422        assert_mask_selects(&result, &[2], &[0, 1]);
423    }
424
425    #[test]
426    fn test_or_block_block_true_overrides_null() {
427        // TRUE OR NULL should be TRUE, even when both sides are BlockList.
428        let true_mask = block(&[], &[]);
429        let null_mask = block(&[], &[0]);
430        let result = true_mask | null_mask;
431
432        // Row 0 should be TRUE.
433        assert_mask_selects(&result, &[0], &[]);
434    }
435
436    #[test]
437    fn test_row_selection_bit_or() {
438        // [T, N, T, N, F, F, F]
439        let left = nullable_set(&[1, 2, 3, 4], &[2, 4]);
440        // [F, F, T, N, T, N, N]
441        let right = nullable_set(&[3, 4, 5, 6], &[4, 6, 7]);
442        // [T, N, T, N, T, N, N]
443        let expected_true = rows(&[1, 3, 5]);
444        let expected_nulls = rows(&[2, 4, 6, 7]);
445
446        let mut result = left.clone();
447        result |= &right;
448        assert_eq!(&result.true_rows(), &expected_true);
449        assert_eq!(result.null_rows(), &expected_nulls);
450
451        // Commutative property holds
452        let mut result = right.clone();
453        result |= &left;
454        assert_eq!(&result.true_rows(), &expected_true);
455        assert_eq!(result.null_rows(), &expected_nulls);
456    }
457
458    #[test]
459    fn test_row_selection_bit_and() {
460        // [T, N, T, N, F, F, F]
461        let left = nullable_set(&[1, 2, 3, 4], &[2, 4]);
462        // [F, F, T, N, T, N, N]
463        let right = nullable_set(&[3, 4, 5, 6], &[4, 6, 7]);
464        // [F, F, T, N, F, F, F]
465        let expected_true = rows(&[3]);
466        let expected_nulls = rows(&[4]);
467
468        let mut result = left.clone();
469        result &= &right;
470        assert_eq!(&result.true_rows(), &expected_true);
471        assert_eq!(result.null_rows(), &expected_nulls);
472
473        // Commutative property holds
474        let mut result = right.clone();
475        result &= &left;
476        assert_eq!(&result.true_rows(), &expected_true);
477        assert_eq!(result.null_rows(), &expected_nulls);
478    }
479
480    #[test]
481    fn test_union_all() {
482        // Union all is basically a series of ORs.
483        // [T, T, T, N, N, N, F, F, F]
484        let set1 = nullable_set(&[1, 2, 3, 4], &[4, 5, 6]);
485        // [T, N, F, T, N, F, T, N, F]
486        let set2 = nullable_set(&[1, 4, 7, 8], &[2, 5, 8]);
487        let set3 = NullableRowAddrSet::empty();
488
489        let result = NullableRowAddrSet::union_all(&[set1, set2, set3]);
490
491        // [T, T, T, T, N, N, T, N, F]
492        assert_eq!(&result.true_rows(), &rows(&[1, 2, 3, 4, 7]));
493        assert_eq!(result.null_rows(), &rows(&[5, 6, 8]));
494    }
495
496    #[test]
497    fn test_partial_eq_semantic_equivalence() {
498        // Two representations of "row 5 is NULL, nothing is TRUE":
499        //   a: selected={5}, nulls={5}  (NULL row also in selected)
500        //   b: selected={},  nulls={5}  (NULL row only in nulls)
501        // Both decode to the same Kleene state on every row, so they must
502        // compare equal under semantic PartialEq.
503        let a = NullableRowAddrSet::new(rows(&[5]), rows(&[5]));
504        let b = NullableRowAddrSet::new(rows(&[]), rows(&[5]));
505        assert_eq!(a, b);
506        assert_eq!(a.true_rows(), b.true_rows());
507        assert_eq!(a.null_rows(), b.null_rows());
508    }
509
510    #[test]
511    fn test_union_all_true_overrides_null() {
512        // Critical conflict case: row 5 is TRUE in set1 but NULL in set2.
513        // Kleene: TRUE ∨ NULL = TRUE → row 5 must end up TRUE, not NULL.
514        let set1 = nullable_set(&[5], &[]);
515        let set2 = nullable_set(&[5], &[5]);
516
517        let result = NullableRowAddrSet::union_all(&[set1, set2]);
518
519        assert_eq!(result.true_rows(), rows(&[5]));
520        assert!(result.null_rows().is_empty());
521    }
522
523    #[test]
524    fn test_union_all_null_only_input() {
525        // Input where a row is NULL but NOT in `selected` (the type allows
526        // `selected` to be a strict subset of TRUE ∪ NULL).
527        let set1 = NullableRowAddrSet::new(rows(&[]), rows(&[5]));
528        let set2 = NullableRowAddrSet::new(rows(&[1]), rows(&[]));
529
530        let result = NullableRowAddrSet::union_all(&[set1, set2]);
531
532        assert_eq!(result.true_rows(), rows(&[1]));
533        assert_eq!(result.null_rows(), &rows(&[5]));
534    }
535
536    #[test]
537    fn test_union_all_all_null_for_a_row() {
538        // Every input marks row 7 as NULL; nothing makes it TRUE.
539        let set1 = nullable_set(&[7], &[7]);
540        let set2 = nullable_set(&[7], &[7]);
541        let set3 = NullableRowAddrSet::new(rows(&[]), rows(&[7]));
542
543        let result = NullableRowAddrSet::union_all(&[set1, set2, set3]);
544
545        assert!(result.true_rows().is_empty());
546        assert_eq!(result.null_rows(), &rows(&[7]));
547    }
548
549    #[test]
550    fn test_union_all_empty_inputs() {
551        let result = NullableRowAddrSet::union_all(&[]);
552        assert!(result.true_rows().is_empty());
553        assert!(result.null_rows().is_empty());
554    }
555
556    #[test]
557    fn test_union_all_single_input() {
558        // One input → state of every row preserved.
559        let set = nullable_set(&[1, 2, 3, 4], &[2, 4]);
560        let result = NullableRowAddrSet::union_all(std::slice::from_ref(&set));
561
562        assert_eq!(result.true_rows(), rows(&[1, 3]));
563        assert_eq!(result.null_rows(), &rows(&[2, 4]));
564    }
565
566    #[test]
567    fn test_union_all_all_empty_inputs() {
568        let inputs = [
569            NullableRowAddrSet::empty(),
570            NullableRowAddrSet::empty(),
571            NullableRowAddrSet::empty(),
572        ];
573        let result = NullableRowAddrSet::union_all(&inputs);
574        assert!(result.true_rows().is_empty());
575        assert!(result.null_rows().is_empty());
576    }
577
578    #[test]
579    fn test_union_all_disjoint_inputs() {
580        // No row appears in more than one input.
581        let set1 = nullable_set(&[1, 2], &[2]);
582        let set2 = nullable_set(&[10, 11], &[11]);
583        let set3 = nullable_set(&[20], &[]);
584
585        let result = NullableRowAddrSet::union_all(&[set1, set2, set3]);
586
587        assert_eq!(result.true_rows(), rows(&[1, 10, 20]));
588        assert_eq!(result.null_rows(), &rows(&[2, 11]));
589    }
590
591    #[test]
592    fn test_union_all_three_state_row() {
593        // Same row 42 across three inputs in three different states:
594        //   set1: TRUE   set2: NULL   set3: FALSE
595        // Kleene OR: TRUE ∨ NULL ∨ FALSE = TRUE.
596        let set1 = nullable_set(&[42], &[]);
597        let set2 = nullable_set(&[42], &[42]);
598        let set3 = NullableRowAddrSet::empty();
599
600        let result = NullableRowAddrSet::union_all(&[set1, set2, set3]);
601
602        assert_eq!(result.true_rows(), rows(&[42]));
603        assert!(result.null_rows().is_empty());
604    }
605
606    #[test]
607    fn test_union_all_matches_repeated_bitor() {
608        // union_all(a, b, c) must equal ((a | b) | c) — same Kleene operator,
609        // applied pairwise via the independently-implemented BitOrAssign.
610        let set1 = nullable_set(&[1, 2, 3, 4], &[4, 5, 6]);
611        let set2 = nullable_set(&[1, 4, 7, 8], &[2, 5, 8]);
612        let set3 = nullable_set(&[2, 6, 10], &[6, 10]);
613
614        let via_union_all =
615            NullableRowAddrSet::union_all(&[set1.clone(), set2.clone(), set3.clone()]);
616
617        let mut via_bitor = set1;
618        via_bitor |= &set2;
619        via_bitor |= &set3;
620
621        assert_eq!(via_union_all.true_rows(), via_bitor.true_rows());
622        assert_eq!(via_union_all.null_rows(), via_bitor.null_rows());
623    }
624
625    #[test]
626    fn test_nullable_row_addr_set_with_nulls() {
627        let set = NullableRowAddrSet::new(rows(&[1, 2, 3]), RowAddrTreeMap::new());
628        let set_with_nulls = set.with_nulls(rows(&[2]));
629
630        assert!(set_with_nulls.selected(1) && set_with_nulls.selected(3));
631        assert!(!set_with_nulls.selected(2)); // null
632    }
633
634    #[test]
635    fn test_nullable_row_addr_set_len_and_is_empty() {
636        let set = nullable_set(&[1, 2, 3, 4, 5], &[2, 4]);
637
638        // len() returns count of TRUE rows (selected - nulls)
639        assert_eq!(set.len(), Some(3)); // 1, 3, 5
640        assert!(!set.is_empty());
641
642        let empty_set = NullableRowAddrSet::empty();
643        assert!(empty_set.is_empty());
644        assert_eq!(empty_set.len(), Some(0));
645    }
646
647    #[test]
648    fn test_nullable_row_addr_set_selected() {
649        let set = nullable_set(&[1, 2, 3], &[2]);
650
651        // selected() returns true only for TRUE rows (in selected and not in nulls)
652        assert!(set.selected(1) && set.selected(3));
653        assert!(!set.selected(2)); // null
654        assert!(!set.selected(4)); // not in selected
655    }
656
657    #[test]
658    fn test_nullable_row_addr_set_partial_eq() {
659        let set1 = nullable_set(&[1, 2, 3], &[2]);
660        let set2 = nullable_set(&[1, 2, 3], &[2]);
661        // set3 has same true_rows but different nulls
662        let set3 = nullable_set(&[1, 3], &[3]);
663
664        assert_eq!(set1, set2);
665        assert_ne!(set1, set3); // different nulls
666    }
667
668    #[test]
669    fn test_nullable_row_addr_set_bitand_fast_path() {
670        // Test fast path when both have no nulls
671        let set1 = nullable_set(&[1, 2, 3], &[]);
672        let set2 = nullable_set(&[2, 3, 4], &[]);
673
674        let mut result = set1;
675        result &= &set2;
676
677        // Intersection: [2, 3]
678        assert!(result.selected(2) && result.selected(3));
679        assert!(!result.selected(1) && !result.selected(4));
680        assert!(result.null_rows().is_empty());
681    }
682
683    #[test]
684    fn test_nullable_row_addr_set_bitor_fast_path() {
685        // Test fast path when both have no nulls
686        let set1 = nullable_set(&[1, 2], &[]);
687        let set2 = nullable_set(&[3, 4], &[]);
688
689        let mut result = set1;
690        result |= &set2;
691
692        // Union: [1, 2, 3, 4]
693        for id in [1, 2, 3, 4] {
694            assert!(result.selected(id));
695        }
696        assert!(result.null_rows().is_empty());
697    }
698
699    #[test]
700    fn test_nullable_row_id_mask_drop_nulls() {
701        // Test drop_nulls for AllowList
702        let allow_mask = allow(&[1, 2, 3, 4], &[2, 4]);
703        let dropped = allow_mask.drop_nulls();
704        // Should be AllowList([1, 3]) after removing nulls
705        assert!(dropped.selected(1) && dropped.selected(3));
706        assert!(!dropped.selected(2) && !dropped.selected(4));
707
708        // Test drop_nulls for BlockList
709        let block_mask = block(&[1, 2], &[3]);
710        let dropped = block_mask.drop_nulls();
711        // BlockList: blocked = [1, 2] | [3] = [1, 2, 3]
712        assert!(!dropped.selected(1) && !dropped.selected(2) && !dropped.selected(3));
713        assert!(dropped.selected(4) && dropped.selected(5));
714    }
715
716    #[test]
717    fn test_nullable_row_id_mask_not_blocklist() {
718        let block_mask = block(&[1, 2], &[2]);
719        let not_mask = !block_mask;
720
721        // NOT(BlockList) = AllowList
722        assert!(matches!(not_mask, NullableRowAddrMask::AllowList(_)));
723    }
724
725    #[test]
726    fn test_nullable_row_id_mask_bitand_allow_allow_fast_path() {
727        // Test AllowList & AllowList with no nulls (fast path)
728        let mask1 = allow(&[1, 2, 3], &[]);
729        let mask2 = allow(&[2, 3, 4], &[]);
730
731        let result = mask1 & mask2;
732        assert_mask_selects(&result, &[2, 3], &[1, 4]);
733    }
734
735    #[test]
736    fn test_nullable_row_id_mask_bitand_allow_block() {
737        let allow_mask = allow(&[1, 2, 3, 4, 5], &[2]);
738        let block_mask = block(&[3, 4], &[4]);
739
740        let result = allow_mask & block_mask;
741        // allow: T=[1,3,4,5], N=[2]
742        // block: F=[3,4], N=[4]
743        // T & T = T; N & T = N (filtered); T & F = F; T & N = N (filtered)
744        assert_mask_selects(&result, &[1, 5], &[2, 3, 4]);
745    }
746
747    #[test]
748    fn test_nullable_row_id_mask_bitand_allow_block_fast_path() {
749        // Test AllowList & BlockList fast path (no nulls)
750        let allow_mask = allow(&[1, 2, 3], &[]);
751        let block_mask = block(&[2], &[]);
752
753        let result = allow_mask & block_mask;
754        assert_mask_selects(&result, &[1, 3], &[2]);
755    }
756
757    #[test]
758    fn test_nullable_row_id_mask_bitand_block_block() {
759        let block1 = block(&[1, 2], &[2]);
760        let block2 = block(&[2, 3], &[3]);
761
762        let result = block1 & block2;
763        // block1: F=[1], N=[2]; block2: F=[2], N=[3]
764        // F & T = F; N & F = F; T & N = N (filtered); T & T = T
765        assert_mask_selects(&result, &[4], &[1, 2, 3]);
766    }
767
768    #[test]
769    fn test_nullable_row_id_mask_bitand_block_block_fast_path() {
770        // Test BlockList & BlockList fast path (no nulls)
771        let block1 = block(&[1], &[]);
772        let block2 = block(&[2], &[]);
773
774        let result = block1 & block2;
775        assert_mask_selects(&result, &[3], &[1, 2]);
776    }
777
778    #[test]
779    fn test_nullable_row_id_mask_bitor_allow_allow_fast_path() {
780        // Test AllowList | AllowList with no nulls (fast path)
781        let mask1 = allow(&[1, 2], &[]);
782        let mask2 = allow(&[3, 4], &[]);
783
784        let result = mask1 | mask2;
785        assert_mask_selects(&result, &[1, 2, 3, 4], &[5]);
786    }
787
788    #[test]
789    fn test_nullable_row_id_mask_bitor_allow_block() {
790        let allow_mask = allow(&[1, 2, 3], &[2]);
791        let block_mask = block(&[1, 4], &[4]);
792
793        let result = allow_mask | block_mask;
794        // allow: T=[1,3], N=[2]; block: F=[1], N=[4], T=everything else
795        // T|F=T, T|T=T, N|T=T
796        assert_mask_selects(&result, &[1, 2, 3], &[]);
797    }
798
799    #[test]
800    fn test_nullable_row_id_mask_bitor_allow_block_fast_path() {
801        // Test AllowList | BlockList fast path (no nulls)
802        let allow_mask = allow(&[1], &[]);
803        let block_mask = block(&[2], &[]);
804
805        let result = allow_mask | block_mask;
806        // AllowList([1]) | BlockList([2]) = BlockList([2] - [1]) = BlockList([2])
807        assert_mask_selects(&result, &[1, 3], &[2]);
808    }
809
810    #[test]
811    fn test_nullable_row_id_mask_bitor_block_block_fast_path() {
812        // Test BlockList | BlockList with no nulls (fast path)
813        let block1 = block(&[1, 2], &[]);
814        let block2 = block(&[2, 3], &[]);
815
816        let result = block1 | block2;
817        // OR of BlockLists: BlockList([1,2] & [2,3]) = BlockList([2])
818        assert_mask_selects(&result, &[1, 3, 4], &[2]);
819    }
820}