use crate::BitMask;
const SPARSE_DIVISOR: usize = 1024;
const DENSE_NUM: usize = 3;
const DENSE_DEN: usize = 10;
pub const HYBRID_CHUNK_SIZE: usize = 4096;
const HYBRID_WORDS_PER_CHUNK: usize = HYBRID_CHUNK_SIZE / 64;
const HYBRID_CHUNK_SPARSE_THRESHOLD: usize = HYBRID_CHUNK_SIZE / 32;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HybridChunk {
Empty,
All,
Sparse(Vec<u16>),
Dense(Box<[u64]>),
}
impl HybridChunk {
pub fn count(&self, chunk_len: usize) -> usize {
match self {
HybridChunk::Empty => 0,
HybridChunk::All => chunk_len,
HybridChunk::Sparse(rows) => rows.len(),
HybridChunk::Dense(words) => {
words.iter().map(|w| w.count_ones() as usize).sum()
}
}
}
pub fn contains_local(&self, off: usize, chunk_len: usize) -> bool {
if off >= chunk_len {
return false;
}
match self {
HybridChunk::Empty => false,
HybridChunk::All => true,
HybridChunk::Sparse(rows) => rows.binary_search(&(off as u16)).is_ok(),
HybridChunk::Dense(words) => {
let wi = off / 64;
let bi = off % 64;
(words[wi] >> bi) & 1 == 1
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AdaptiveSelection {
Empty { nrows: usize },
All { nrows: usize },
SelectionVector { rows: Vec<u32>, nrows: usize },
VerbatimMask { mask: BitMask },
Hybrid { nrows: usize, chunks: Vec<HybridChunk> },
}
impl AdaptiveSelection {
pub fn all(nrows: usize) -> Self {
AdaptiveSelection::All { nrows }
}
pub fn empty(nrows: usize) -> Self {
AdaptiveSelection::Empty { nrows }
}
pub fn from_bitmask(mask: BitMask) -> Self {
let nrows = mask.nrows();
let count = mask.count_ones();
if count == 0 {
AdaptiveSelection::Empty { nrows }
} else if count == nrows {
AdaptiveSelection::All { nrows }
} else {
AdaptiveSelection::VerbatimMask { mask }
}
}
pub fn from_predicate_result(words: Vec<u64>, nrows: usize) -> Self {
let count: usize = words.iter().map(|w| w.count_ones() as usize).sum();
Self::classify(words, nrows, count)
}
pub fn from_words_with_count(words: Vec<u64>, nrows: usize, count: usize) -> Self {
Self::classify(words, nrows, count)
}
fn classify(words: Vec<u64>, nrows: usize, count: usize) -> Self {
if count == 0 {
return AdaptiveSelection::Empty { nrows };
}
if count == nrows {
return AdaptiveSelection::All { nrows };
}
if Self::is_sparse(count, nrows) {
let rows = words_to_indices(&words);
return AdaptiveSelection::SelectionVector { rows, nrows };
}
let mask = bitmask_from_words(words, nrows);
if Self::is_dense(count, nrows) {
AdaptiveSelection::VerbatimMask { mask }
} else if Self::should_hybrid(nrows) {
let chunks = chunks_from_mask(&mask);
AdaptiveSelection::Hybrid { nrows, chunks }
} else {
AdaptiveSelection::VerbatimMask { mask }
}
}
#[inline]
fn is_sparse(count: usize, nrows: usize) -> bool {
count < nrows / SPARSE_DIVISOR
}
#[inline]
fn is_dense(count: usize, nrows: usize) -> bool {
let lhs = (count as u128) * (DENSE_DEN as u128);
let rhs = (nrows as u128) * (DENSE_NUM as u128);
lhs > rhs
}
#[inline]
fn should_hybrid(nrows: usize) -> bool {
nrows >= 2 * HYBRID_CHUNK_SIZE
}
pub fn nrows(&self) -> usize {
match self {
AdaptiveSelection::Empty { nrows } => *nrows,
AdaptiveSelection::All { nrows } => *nrows,
AdaptiveSelection::SelectionVector { nrows, .. } => *nrows,
AdaptiveSelection::VerbatimMask { mask } => mask.nrows(),
AdaptiveSelection::Hybrid { nrows, .. } => *nrows,
}
}
pub fn count(&self) -> usize {
match self {
AdaptiveSelection::Empty { .. } => 0,
AdaptiveSelection::All { nrows } => *nrows,
AdaptiveSelection::SelectionVector { rows, .. } => rows.len(),
AdaptiveSelection::VerbatimMask { mask } => mask.count_ones(),
AdaptiveSelection::Hybrid { nrows, chunks } => {
let mut total = 0;
for (i, c) in chunks.iter().enumerate() {
total += c.count(chunk_len_for(i, *nrows));
}
total
}
}
}
pub fn contains(&self, row: usize) -> bool {
match self {
AdaptiveSelection::Empty { .. } => false,
AdaptiveSelection::All { nrows } => row < *nrows,
AdaptiveSelection::SelectionVector { rows, nrows } => {
if row >= *nrows {
return false;
}
let target = row as u32;
rows.binary_search(&target).is_ok()
}
AdaptiveSelection::VerbatimMask { mask } => {
if row >= mask.nrows() {
return false;
}
mask.get(row)
}
AdaptiveSelection::Hybrid { nrows, chunks } => {
if row >= *nrows {
return false;
}
let ci = row / HYBRID_CHUNK_SIZE;
let off = row % HYBRID_CHUNK_SIZE;
chunks[ci].contains_local(off, chunk_len_for(ci, *nrows))
}
}
}
pub fn iter_indices(&self) -> SelectionIndices<'_> {
match self {
AdaptiveSelection::Empty { .. } => SelectionIndices::Empty,
AdaptiveSelection::All { nrows } => SelectionIndices::Range(0..*nrows),
AdaptiveSelection::SelectionVector { rows, .. } => SelectionIndices::Vec(rows.iter()),
AdaptiveSelection::VerbatimMask { mask } => SelectionIndices::Mask {
mask,
next: 0,
nrows: mask.nrows(),
},
AdaptiveSelection::Hybrid { nrows, chunks } => SelectionIndices::Hybrid {
chunks,
nrows: *nrows,
ci: 0,
inner: HybridInner::Start,
},
}
}
pub fn materialize_mask(&self) -> BitMask {
match self {
AdaptiveSelection::Empty { nrows } => BitMask::all_false(*nrows),
AdaptiveSelection::All { nrows } => BitMask::all_true(*nrows),
AdaptiveSelection::SelectionVector { rows, nrows } => {
let mut bools = vec![false; *nrows];
for &r in rows {
bools[r as usize] = true;
}
BitMask::from_bools(&bools)
}
AdaptiveSelection::VerbatimMask { mask } => mask.clone(),
AdaptiveSelection::Hybrid { nrows, chunks } => {
let mut bools = vec![false; *nrows];
for (ci, chunk) in chunks.iter().enumerate() {
let base = ci * HYBRID_CHUNK_SIZE;
let chunk_len = chunk_len_for(ci, *nrows);
match chunk {
HybridChunk::Empty => {}
HybridChunk::All => {
for off in 0..chunk_len {
bools[base + off] = true;
}
}
HybridChunk::Sparse(rows) => {
for &r in rows {
bools[base + r as usize] = true;
}
}
HybridChunk::Dense(words) => {
for off in 0..chunk_len {
let wi = off / 64;
let bi = off % 64;
if (words[wi] >> bi) & 1 == 1 {
bools[base + off] = true;
}
}
}
}
}
BitMask::from_bools(&bools)
}
}
}
pub fn materialize_indices(&self) -> Vec<u32> {
match self {
AdaptiveSelection::Empty { .. } => Vec::new(),
AdaptiveSelection::All { nrows } => (0..*nrows as u32).collect(),
AdaptiveSelection::SelectionVector { rows, .. } => rows.clone(),
AdaptiveSelection::VerbatimMask { mask } => {
mask.iter_set().map(|i| i as u32).collect()
}
AdaptiveSelection::Hybrid { nrows, chunks } => {
let mut out = Vec::with_capacity(self.count());
for (ci, chunk) in chunks.iter().enumerate() {
let base = (ci * HYBRID_CHUNK_SIZE) as u32;
let chunk_len = chunk_len_for(ci, *nrows);
match chunk {
HybridChunk::Empty => {}
HybridChunk::All => {
for off in 0..chunk_len as u32 {
out.push(base + off);
}
}
HybridChunk::Sparse(rows) => {
for &r in rows {
out.push(base + r as u32);
}
}
HybridChunk::Dense(words) => {
for (wi, &w) in words.iter().enumerate() {
let mut bits = w;
while bits != 0 {
let tz = bits.trailing_zeros() as usize;
let off = wi * 64 + tz;
if off < chunk_len {
out.push(base + off as u32);
}
bits &= bits - 1;
}
}
}
}
}
out
}
}
}
pub fn intersect(&self, other: &AdaptiveSelection) -> AdaptiveSelection {
assert_eq!(
self.nrows(),
other.nrows(),
"AdaptiveSelection::intersect: nrows mismatch ({} vs {})",
self.nrows(),
other.nrows()
);
match (self, other) {
(AdaptiveSelection::Empty { nrows }, _) | (_, AdaptiveSelection::Empty { nrows }) => {
return AdaptiveSelection::Empty { nrows: *nrows };
}
(AdaptiveSelection::All { .. }, _) => return other.clone(),
(_, AdaptiveSelection::All { .. }) => return self.clone(),
_ => {}
}
if let (
AdaptiveSelection::SelectionVector { rows: a, nrows },
AdaptiveSelection::SelectionVector { rows: b, .. },
) = (self, other)
{
let merged = sorted_merge_intersect(a, b);
return Self::classify_sparse(merged, *nrows);
}
if let (
AdaptiveSelection::SelectionVector { rows, nrows },
AdaptiveSelection::VerbatimMask { mask },
)
| (
AdaptiveSelection::VerbatimMask { mask },
AdaptiveSelection::SelectionVector { rows, nrows },
) = (self, other)
{
let filtered: Vec<u32> =
rows.iter().copied().filter(|&r| mask.get(r as usize)).collect();
return Self::classify_sparse(filtered, *nrows);
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks: ac },
AdaptiveSelection::Hybrid { chunks: bc, .. },
) = (self, other)
{
let mut out = Vec::with_capacity(ac.len());
for ci in 0..ac.len() {
let chunk_len = chunk_len_for(ci, *nrows);
out.push(intersect_chunks(&ac[ci], &bc[ci], chunk_len));
}
return Self::simplify_hybrid(*nrows, out);
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks },
AdaptiveSelection::SelectionVector { rows, .. },
)
| (
AdaptiveSelection::SelectionVector { rows, .. },
AdaptiveSelection::Hybrid { nrows, chunks },
) = (self, other)
{
let mut filtered: Vec<u32> = Vec::with_capacity(rows.len());
for &r in rows {
let row = r as usize;
let ci = row / HYBRID_CHUNK_SIZE;
let off = row % HYBRID_CHUNK_SIZE;
let chunk_len = chunk_len_for(ci, *nrows);
if chunks[ci].contains_local(off, chunk_len) {
filtered.push(r);
}
}
return Self::classify_sparse(filtered, *nrows);
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks },
AdaptiveSelection::VerbatimMask { mask },
)
| (
AdaptiveSelection::VerbatimMask { mask },
AdaptiveSelection::Hybrid { nrows, chunks },
) = (self, other)
{
let words = mask.words_slice();
let mut out_chunks = Vec::with_capacity(chunks.len());
for ci in 0..chunks.len() {
let chunk_len = chunk_len_for(ci, *nrows);
let w_start = ci * HYBRID_WORDS_PER_CHUNK;
let w_end = (w_start + HYBRID_WORDS_PER_CHUNK).min(words.len());
let mask_chunk = &words[w_start..w_end];
out_chunks.push(intersect_chunk_with_words(
&chunks[ci],
mask_chunk,
chunk_len,
));
}
return Self::simplify_hybrid(*nrows, out_chunks);
}
let lhs = self.materialize_mask();
let rhs = other.materialize_mask();
let words: Vec<u64> = lhs
.words_slice()
.iter()
.zip(rhs.words_slice().iter())
.map(|(a, b)| a & b)
.collect();
AdaptiveSelection::from_predicate_result(words, lhs.nrows())
}
pub fn union(&self, other: &AdaptiveSelection) -> AdaptiveSelection {
assert_eq!(
self.nrows(),
other.nrows(),
"AdaptiveSelection::union: nrows mismatch ({} vs {})",
self.nrows(),
other.nrows()
);
match (self, other) {
(AdaptiveSelection::All { nrows }, _) | (_, AdaptiveSelection::All { nrows }) => {
return AdaptiveSelection::All { nrows: *nrows };
}
(AdaptiveSelection::Empty { .. }, _) => return other.clone(),
(_, AdaptiveSelection::Empty { .. }) => return self.clone(),
_ => {}
}
if let (
AdaptiveSelection::SelectionVector { rows: a, nrows },
AdaptiveSelection::SelectionVector { rows: b, .. },
) = (self, other)
{
let merged = sorted_merge_union(a, b);
if merged.len() >= *nrows / SPARSE_DIVISOR {
} else {
return Self::classify_sparse(merged, *nrows);
}
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks: ac },
AdaptiveSelection::Hybrid { chunks: bc, .. },
) = (self, other)
{
let mut out = Vec::with_capacity(ac.len());
for ci in 0..ac.len() {
let chunk_len = chunk_len_for(ci, *nrows);
out.push(union_chunks(&ac[ci], &bc[ci], chunk_len));
}
return Self::simplify_hybrid(*nrows, out);
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks },
AdaptiveSelection::SelectionVector { rows, .. },
)
| (
AdaptiveSelection::SelectionVector { rows, .. },
AdaptiveSelection::Hybrid { nrows, chunks },
) = (self, other)
{
let mut out_chunks: Vec<HybridChunk> = chunks.clone();
let mut i = 0usize;
while i < rows.len() {
let first_row = rows[i] as usize;
let ci = first_row / HYBRID_CHUNK_SIZE;
let chunk_base = ci * HYBRID_CHUNK_SIZE;
let chunk_end = chunk_base + HYBRID_CHUNK_SIZE;
let mut bucket: Vec<u16> = Vec::new();
while i < rows.len() && (rows[i] as usize) < chunk_end {
bucket.push((rows[i] as usize - chunk_base) as u16);
i += 1;
}
let chunk_len = chunk_len_for(ci, *nrows);
let bucket_chunk = if bucket.len() == chunk_len {
HybridChunk::All
} else {
HybridChunk::Sparse(bucket)
};
out_chunks[ci] = union_chunks(&out_chunks[ci], &bucket_chunk, chunk_len);
}
return Self::simplify_hybrid(*nrows, out_chunks);
}
if let (
AdaptiveSelection::Hybrid { nrows, chunks },
AdaptiveSelection::VerbatimMask { mask },
)
| (
AdaptiveSelection::VerbatimMask { mask },
AdaptiveSelection::Hybrid { nrows, chunks },
) = (self, other)
{
let words = mask.words_slice();
let mut out_chunks = Vec::with_capacity(chunks.len());
for ci in 0..chunks.len() {
let chunk_len = chunk_len_for(ci, *nrows);
let w_start = ci * HYBRID_WORDS_PER_CHUNK;
let w_end = (w_start + HYBRID_WORDS_PER_CHUNK).min(words.len());
let mask_chunk = &words[w_start..w_end];
out_chunks.push(union_chunk_with_words(
&chunks[ci],
mask_chunk,
chunk_len,
));
}
return Self::simplify_hybrid(*nrows, out_chunks);
}
let lhs = self.materialize_mask();
let rhs = other.materialize_mask();
let words: Vec<u64> = lhs
.words_slice()
.iter()
.zip(rhs.words_slice().iter())
.map(|(a, b)| a | b)
.collect();
AdaptiveSelection::from_predicate_result(words, lhs.nrows())
}
pub fn explain_selection_mode(&self) -> &'static str {
match self {
AdaptiveSelection::Empty { .. } => "Empty",
AdaptiveSelection::All { .. } => "All",
AdaptiveSelection::SelectionVector { .. } => "SelectionVector",
AdaptiveSelection::VerbatimMask { .. } => "VerbatimMask",
AdaptiveSelection::Hybrid { .. } => "Hybrid",
}
}
fn classify_sparse(rows: Vec<u32>, nrows: usize) -> Self {
if rows.is_empty() {
AdaptiveSelection::Empty { nrows }
} else if rows.len() == nrows {
AdaptiveSelection::All { nrows }
} else {
AdaptiveSelection::SelectionVector { rows, nrows }
}
}
fn simplify_hybrid(nrows: usize, chunks: Vec<HybridChunk>) -> AdaptiveSelection {
let mut total = 0usize;
for (i, c) in chunks.iter().enumerate() {
total += c.count(chunk_len_for(i, nrows));
}
if total == 0 {
return AdaptiveSelection::Empty { nrows };
}
if total == nrows {
return AdaptiveSelection::All { nrows };
}
AdaptiveSelection::Hybrid { nrows, chunks }
}
}
pub enum SelectionIndices<'a> {
Empty,
Range(std::ops::Range<usize>),
Vec(std::slice::Iter<'a, u32>),
Mask {
mask: &'a BitMask,
next: usize,
nrows: usize,
},
Hybrid {
chunks: &'a [HybridChunk],
nrows: usize,
ci: usize,
inner: HybridInner<'a>,
},
}
pub enum HybridInner<'a> {
Start,
AllRange { base: u32, next: u32, end: u32 },
Sparse {
base: u32,
iter: std::slice::Iter<'a, u16>,
},
Dense {
base: u32,
words: &'a [u64],
wi: usize,
bits: u64,
chunk_len: usize,
},
}
impl<'a> Iterator for SelectionIndices<'a> {
type Item = usize;
fn next(&mut self) -> Option<usize> {
match self {
SelectionIndices::Empty => None,
SelectionIndices::Range(r) => r.next(),
SelectionIndices::Vec(it) => it.next().map(|&v| v as usize),
SelectionIndices::Mask { mask, next, nrows } => {
while *next < *nrows {
let i = *next;
*next += 1;
if mask.get(i) {
return Some(i);
}
}
None
}
SelectionIndices::Hybrid {
chunks,
nrows,
ci,
inner,
} => loop {
match inner {
HybridInner::Start => {
}
HybridInner::AllRange { base, next, end } => {
if *next < *end {
let v = *next;
*next += 1;
return Some((*base + v) as usize);
}
}
HybridInner::Sparse { base, iter } => {
if let Some(&off) = iter.next() {
return Some((*base + off as u32) as usize);
}
}
HybridInner::Dense {
base,
words,
wi,
bits,
chunk_len,
} => {
loop {
if *bits != 0 {
let tz = bits.trailing_zeros() as usize;
let off = *wi * 64 + tz;
*bits &= *bits - 1;
if off < *chunk_len {
return Some((*base + off as u32) as usize);
}
continue;
}
*wi += 1;
if *wi >= words.len() {
break; }
*bits = words[*wi];
}
}
}
if *ci >= chunks.len() {
return None;
}
let chunk = &chunks[*ci];
let chunk_idx = *ci;
*ci += 1;
let chunk_len = chunk_len_for(chunk_idx, *nrows);
let base = (chunk_idx * HYBRID_CHUNK_SIZE) as u32;
*inner = match chunk {
HybridChunk::Empty => HybridInner::Start,
HybridChunk::All => HybridInner::AllRange {
base,
next: 0,
end: chunk_len as u32,
},
HybridChunk::Sparse(rows) => HybridInner::Sparse {
base,
iter: rows.iter(),
},
HybridChunk::Dense(words) => HybridInner::Dense {
base,
words: &words[..],
wi: 0,
bits: words[0],
chunk_len,
},
};
},
}
}
}
fn words_to_indices(words: &[u64]) -> Vec<u32> {
let mut out = Vec::with_capacity(64);
for (wi, &w) in words.iter().enumerate() {
let mut bits = w;
while bits != 0 {
let tz = bits.trailing_zeros() as usize;
let row = wi * 64 + tz;
out.push(row as u32);
bits &= bits - 1;
}
}
out
}
fn bitmask_from_words(mut words: Vec<u64>, nrows: usize) -> BitMask {
let nwords = (nrows + 63) / 64;
if words.len() < nwords {
words.resize(nwords, 0);
} else if words.len() > nwords {
words.truncate(nwords);
}
if nwords > 0 && nrows % 64 != 0 {
let tail = nrows % 64;
words[nwords - 1] &= (1u64 << tail) - 1;
}
BitMask::from_words_for_test(words, nrows)
}
#[inline]
fn chunk_len_for(ci: usize, nrows: usize) -> usize {
let base = ci * HYBRID_CHUNK_SIZE;
let remaining = nrows.saturating_sub(base);
remaining.min(HYBRID_CHUNK_SIZE)
}
fn chunks_from_mask(mask: &BitMask) -> Vec<HybridChunk> {
let nrows = mask.nrows();
let total_words = mask.words_slice().len();
let nchunks = (nrows + HYBRID_CHUNK_SIZE - 1) / HYBRID_CHUNK_SIZE;
let mut chunks = Vec::with_capacity(nchunks);
let words = mask.words_slice();
for ci in 0..nchunks {
let chunk_len = chunk_len_for(ci, nrows);
let w_start = ci * HYBRID_WORDS_PER_CHUNK;
let w_end = (w_start + HYBRID_WORDS_PER_CHUNK).min(total_words);
let chunk_words = &words[w_start..w_end];
let count: usize = chunk_words.iter().map(|w| w.count_ones() as usize).sum();
let chunk = if count == 0 {
HybridChunk::Empty
} else if count == chunk_len {
HybridChunk::All
} else if count < HYBRID_CHUNK_SPARSE_THRESHOLD {
let mut offs: Vec<u16> = Vec::with_capacity(count);
for (i, &w) in chunk_words.iter().enumerate() {
let mut bits = w;
while bits != 0 {
let tz = bits.trailing_zeros() as usize;
let off = i * 64 + tz;
if off < chunk_len {
offs.push(off as u16);
}
bits &= bits - 1;
}
}
HybridChunk::Sparse(offs)
} else {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for (i, &w) in chunk_words.iter().enumerate() {
buf[i] = w;
}
HybridChunk::Dense(buf.into_boxed_slice())
};
chunks.push(chunk);
}
chunks
}
fn classify_sparse_chunk(offs: Vec<u16>, chunk_len: usize) -> HybridChunk {
if offs.is_empty() {
HybridChunk::Empty
} else if offs.len() == chunk_len {
HybridChunk::All
} else {
HybridChunk::Sparse(offs)
}
}
fn classify_dense_chunk(buf: Vec<u64>, chunk_len: usize) -> HybridChunk {
let count: usize = buf.iter().map(|w| w.count_ones() as usize).sum();
if count == 0 {
HybridChunk::Empty
} else if count == chunk_len {
HybridChunk::All
} else if count < HYBRID_CHUNK_SPARSE_THRESHOLD {
let mut offs = Vec::with_capacity(count);
for (i, &w) in buf.iter().enumerate() {
let mut bits = w;
while bits != 0 {
let tz = bits.trailing_zeros() as usize;
let off = i * 64 + tz;
if off < chunk_len {
offs.push(off as u16);
}
bits &= bits - 1;
}
}
HybridChunk::Sparse(offs)
} else {
HybridChunk::Dense(buf.into_boxed_slice())
}
}
fn merge_intersect_u16(a: &[u16], b: &[u16]) -> Vec<u16> {
let mut out = Vec::with_capacity(a.len().min(b.len()));
let (mut i, mut j) = (0, 0);
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
std::cmp::Ordering::Equal => {
out.push(a[i]);
i += 1;
j += 1;
}
}
}
out
}
fn merge_union_u16(a: &[u16], b: &[u16]) -> Vec<u16> {
let mut out = Vec::with_capacity(a.len() + b.len());
let (mut i, mut j) = (0, 0);
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => {
out.push(a[i]);
i += 1;
}
std::cmp::Ordering::Greater => {
out.push(b[j]);
j += 1;
}
std::cmp::Ordering::Equal => {
out.push(a[i]);
i += 1;
j += 1;
}
}
}
out.extend_from_slice(&a[i..]);
out.extend_from_slice(&b[j..]);
out
}
fn intersect_chunks(a: &HybridChunk, b: &HybridChunk, chunk_len: usize) -> HybridChunk {
use HybridChunk::*;
match (a, b) {
(Empty, _) | (_, Empty) => Empty,
(All, x) | (x, All) => x.clone(),
(Sparse(ax), Sparse(bx)) => {
let merged = merge_intersect_u16(ax, bx);
classify_sparse_chunk(merged, chunk_len)
}
(Sparse(offs), Dense(words)) | (Dense(words), Sparse(offs)) => {
let mut out: Vec<u16> = Vec::with_capacity(offs.len());
for &off in offs {
let off_u = off as usize;
let wi = off_u / 64;
let bi = off_u % 64;
if (words[wi] >> bi) & 1 == 1 {
out.push(off);
}
}
classify_sparse_chunk(out, chunk_len)
}
(Dense(aw), Dense(bw)) => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for i in 0..HYBRID_WORDS_PER_CHUNK {
buf[i] = aw[i] & bw[i];
}
classify_dense_chunk(buf, chunk_len)
}
}
}
fn union_chunks(a: &HybridChunk, b: &HybridChunk, chunk_len: usize) -> HybridChunk {
use HybridChunk::*;
match (a, b) {
(All, _) | (_, All) => All,
(Empty, x) | (x, Empty) => x.clone(),
(Sparse(ax), Sparse(bx)) => {
let merged = merge_union_u16(ax, bx);
if merged.len() == chunk_len {
All
} else if merged.len() < HYBRID_CHUNK_SPARSE_THRESHOLD {
if merged.is_empty() {
Empty
} else {
Sparse(merged)
}
} else {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for &off in &merged {
let off = off as usize;
buf[off / 64] |= 1u64 << (off % 64);
}
classify_dense_chunk(buf, chunk_len)
}
}
(Sparse(offs), Dense(words)) | (Dense(words), Sparse(offs)) => {
let mut buf: Vec<u64> = words.iter().copied().collect();
for &off in offs {
let off = off as usize;
buf[off / 64] |= 1u64 << (off % 64);
}
classify_dense_chunk(buf, chunk_len)
}
(Dense(aw), Dense(bw)) => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for i in 0..HYBRID_WORDS_PER_CHUNK {
buf[i] = aw[i] | bw[i];
}
classify_dense_chunk(buf, chunk_len)
}
}
}
fn intersect_chunk_with_words(
chunk: &HybridChunk,
mask_words: &[u64],
chunk_len: usize,
) -> HybridChunk {
use HybridChunk::*;
match chunk {
Empty => Empty,
All => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for (i, &w) in mask_words.iter().enumerate() {
buf[i] = w;
}
classify_dense_chunk(buf, chunk_len)
}
Sparse(offs) => {
let mut out: Vec<u16> = Vec::with_capacity(offs.len());
for &off in offs {
let off_u = off as usize;
let wi = off_u / 64;
if wi < mask_words.len() {
let bi = off_u % 64;
if (mask_words[wi] >> bi) & 1 == 1 {
out.push(off);
}
}
}
classify_sparse_chunk(out, chunk_len)
}
Dense(words) => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for i in 0..HYBRID_WORDS_PER_CHUNK {
let mw = if i < mask_words.len() { mask_words[i] } else { 0 };
buf[i] = words[i] & mw;
}
classify_dense_chunk(buf, chunk_len)
}
}
}
fn union_chunk_with_words(
chunk: &HybridChunk,
mask_words: &[u64],
chunk_len: usize,
) -> HybridChunk {
use HybridChunk::*;
match chunk {
Empty => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for (i, &w) in mask_words.iter().enumerate() {
buf[i] = w;
}
classify_dense_chunk(buf, chunk_len)
}
All => All,
Sparse(offs) => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for (i, &w) in mask_words.iter().enumerate() {
buf[i] = w;
}
for &off in offs {
let off = off as usize;
buf[off / 64] |= 1u64 << (off % 64);
}
classify_dense_chunk(buf, chunk_len)
}
Dense(words) => {
let mut buf = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for i in 0..HYBRID_WORDS_PER_CHUNK {
let mw = if i < mask_words.len() { mask_words[i] } else { 0 };
buf[i] = words[i] | mw;
}
classify_dense_chunk(buf, chunk_len)
}
}
}
fn sorted_merge_intersect(a: &[u32], b: &[u32]) -> Vec<u32> {
let mut out = Vec::with_capacity(a.len().min(b.len()));
let (mut i, mut j) = (0, 0);
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
std::cmp::Ordering::Equal => {
out.push(a[i]);
i += 1;
j += 1;
}
}
}
out
}
fn sorted_merge_union(a: &[u32], b: &[u32]) -> Vec<u32> {
let mut out = Vec::with_capacity(a.len() + b.len());
let (mut i, mut j) = (0, 0);
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Less => {
out.push(a[i]);
i += 1;
}
std::cmp::Ordering::Greater => {
out.push(b[j]);
j += 1;
}
std::cmp::Ordering::Equal => {
out.push(a[i]);
i += 1;
j += 1;
}
}
}
out.extend_from_slice(&a[i..]);
out.extend_from_slice(&b[j..]);
out
}
#[cfg(test)]
mod tests {
use super::*;
fn mk_bools(n: usize, set: &[usize]) -> Vec<bool> {
let mut v = vec![false; n];
for &i in set {
v[i] = true;
}
v
}
fn classify_from_bools(bools: &[bool]) -> AdaptiveSelection {
let mask = BitMask::from_bools(bools);
let words: Vec<u64> = mask.words_slice().to_vec();
AdaptiveSelection::from_predicate_result(words, bools.len())
}
#[test]
fn empty_when_no_bits_set() {
let s = classify_from_bools(&mk_bools(100, &[]));
assert_eq!(s.explain_selection_mode(), "Empty");
assert_eq!(s.count(), 0);
assert_eq!(s.nrows(), 100);
}
#[test]
fn all_when_every_bit_set() {
let s = classify_from_bools(&vec![true; 100]);
assert_eq!(s.explain_selection_mode(), "All");
assert_eq!(s.count(), 100);
assert_eq!(s.nrows(), 100);
}
#[test]
fn sparse_picks_selection_vector() {
let mut hits = vec![];
for i in (0..100_000).step_by(2_000) {
hits.push(i);
}
assert_eq!(hits.len(), 50);
let s = classify_from_bools(&mk_bools(100_000, &hits));
assert_eq!(s.explain_selection_mode(), "SelectionVector");
assert_eq!(s.count(), 50);
}
#[test]
fn dense_picks_verbatim_mask() {
let hits: Vec<usize> = (0..1000).step_by(2).collect();
let s = classify_from_bools(&mk_bools(1000, &hits));
assert_eq!(s.explain_selection_mode(), "VerbatimMask");
assert_eq!(s.count(), 500);
}
#[test]
fn mid_band_small_frame_stays_verbatim() {
let hits: Vec<usize> = (0..1000).step_by(5).collect();
assert_eq!(hits.len(), 200);
let s = classify_from_bools(&mk_bools(1000, &hits));
assert_eq!(s.explain_selection_mode(), "VerbatimMask");
assert_eq!(s.count(), 200);
}
#[test]
fn mid_band_large_frame_picks_hybrid() {
let hits: Vec<usize> = (0..100_000).step_by(20).collect();
assert_eq!(hits.len(), 5_000);
let s = classify_from_bools(&mk_bools(100_000, &hits));
assert_eq!(s.explain_selection_mode(), "Hybrid");
assert_eq!(s.count(), 5_000);
}
#[test]
fn count_and_contains_agree_for_sparse() {
let hits = vec![3usize, 17, 99, 5_000];
let s = classify_from_bools(&mk_bools(100_000, &hits));
assert_eq!(s.explain_selection_mode(), "SelectionVector");
for h in &hits {
assert!(s.contains(*h), "expected contains({})", h);
}
assert!(!s.contains(0));
assert!(!s.contains(100_001)); }
#[test]
fn iter_indices_ascending_for_every_arm() {
let hybrid_hits: Vec<usize> = (0..100_000).step_by(20).collect();
let cases: Vec<(usize, Vec<usize>, &'static str)> = vec![
(10, vec![], "Empty"),
(10, (0..10).collect(), "All"),
(100_000, vec![3, 17, 99, 5_000], "SelectionVector"),
(1000, (0..1000).step_by(2).collect(), "VerbatimMask"),
(100_000, hybrid_hits, "Hybrid"),
];
for (nrows, hits, expected_mode) in cases {
let s = classify_from_bools(&mk_bools(nrows, &hits));
assert_eq!(s.explain_selection_mode(), expected_mode);
let collected: Vec<usize> = s.iter_indices().collect();
assert_eq!(collected, hits, "{expected_mode} iter mismatch");
for w in collected.windows(2) {
assert!(w[0] < w[1]);
}
}
}
#[test]
fn materialize_round_trip_agrees_for_every_arm() {
let hits = vec![1usize, 5, 10, 64, 65, 200];
let nrows = 256;
let s = classify_from_bools(&mk_bools(nrows, &hits));
let mask = s.materialize_mask();
let idx = s.materialize_indices();
assert_eq!(mask.count_ones(), hits.len());
assert_eq!(idx.len(), hits.len());
for h in &hits {
assert!(mask.get(*h));
assert!(idx.binary_search(&(*h as u32)).is_ok());
}
}
#[test]
fn intersect_identity_with_all() {
let s = classify_from_bools(&mk_bools(1000, &[1, 2, 3]));
let all = AdaptiveSelection::all(1000);
let r = s.intersect(&all);
assert_eq!(r.count(), 3);
assert!(r.contains(1) && r.contains(2) && r.contains(3));
}
#[test]
fn intersect_with_empty_is_empty() {
let s = classify_from_bools(&mk_bools(1000, &[1, 2, 3]));
let empty = AdaptiveSelection::empty(1000);
let r = s.intersect(&empty);
assert_eq!(r.explain_selection_mode(), "Empty");
assert_eq!(r.count(), 0);
}
#[test]
fn union_with_all_is_all() {
let s = classify_from_bools(&mk_bools(1000, &[1, 2, 3]));
let all = AdaptiveSelection::all(1000);
let r = s.union(&all);
assert_eq!(r.explain_selection_mode(), "All");
assert_eq!(r.count(), 1000);
}
#[test]
fn intersect_mode_mixing_sparse_and_dense_gives_sparse_or_empty() {
let sparse = classify_from_bools(&mk_bools(100_000, &[3, 17, 99, 5_000]));
let dense_hits: Vec<usize> = (0..100_000).step_by(2).collect();
let dense = classify_from_bools(&mk_bools(100_000, &dense_hits));
assert_eq!(dense.explain_selection_mode(), "VerbatimMask"); assert_eq!(sparse.explain_selection_mode(), "SelectionVector");
let r = sparse.intersect(&dense);
assert_eq!(r.count(), 1);
assert!(r.contains(5_000));
}
#[test]
fn intersect_is_commutative_and_associative_for_three_inputs() {
let a = classify_from_bools(&mk_bools(256, &[1, 5, 7, 99]));
let b = classify_from_bools(&mk_bools(256, &[5, 99, 100, 200]));
let c = classify_from_bools(&mk_bools(256, &[5, 50, 99, 250]));
let ab_c = a.intersect(&b).intersect(&c);
let bc_a = b.intersect(&c).intersect(&a);
let ba_c = b.intersect(&a).intersect(&c);
assert_eq!(ab_c.materialize_indices(), bc_a.materialize_indices());
assert_eq!(ab_c.materialize_indices(), ba_c.materialize_indices());
}
#[test]
fn small_nrows_never_classifies_as_sparse() {
let s = classify_from_bools(&mk_bools(10, &[3]));
assert_ne!(s.explain_selection_mode(), "SelectionVector");
}
#[test]
fn dense_threshold_is_exclusive_30_percent() {
let hits: Vec<usize> = (0..300).collect();
let s = classify_from_bools(&mk_bools(1000, &hits));
assert_eq!(s.explain_selection_mode(), "VerbatimMask");
}
#[test]
fn sparse_iter_uses_word_skipping() {
let hits = vec![100_usize, 50_000, 200_000, 500_000, 999_000];
let s = classify_from_bools(&mk_bools(1_000_000, &hits));
assert_eq!(s.explain_selection_mode(), "SelectionVector");
let collected: Vec<usize> = s.iter_indices().collect();
assert_eq!(collected, hits);
}
#[test]
fn hybrid_iter_matches_bitmask_iter_under_bimodal_density() {
let mut hits: Vec<usize> = Vec::new();
for i in 50_000..100_000 {
if i % 2 == 0 {
hits.push(i);
}
}
let bools = mk_bools(100_000, &hits);
let s = classify_from_bools(&bools);
assert_eq!(s.explain_selection_mode(), "Hybrid");
let collected: Vec<usize> = s.iter_indices().collect();
assert_eq!(collected, hits);
assert_eq!(s.count(), hits.len());
}
#[test]
fn hybrid_contains_matches_bitmask() {
let hits: Vec<usize> = (0..100_000).step_by(20).collect();
let s = classify_from_bools(&mk_bools(100_000, &hits));
assert_eq!(s.explain_selection_mode(), "Hybrid");
for h in &hits {
assert!(s.contains(*h));
}
assert!(!s.contains(99_999));
assert!(!s.contains(100_001));
}
#[test]
fn hybrid_materialize_round_trip() {
let hits: Vec<usize> = (0..100_000).step_by(20).collect();
let s = classify_from_bools(&mk_bools(100_000, &hits));
assert_eq!(s.explain_selection_mode(), "Hybrid");
let bm = s.materialize_mask();
assert_eq!(bm.count_ones(), hits.len());
let idx = s.materialize_indices();
assert_eq!(idx.len(), hits.len());
for h in &hits {
assert!(bm.get(*h));
assert!(idx.binary_search(&(*h as u32)).is_ok());
}
}
#[test]
fn hybrid_chunks_pick_local_modes() {
let nrows = 8 * HYBRID_CHUNK_SIZE;
let mut hits = Vec::new();
for i in 0..HYBRID_CHUNK_SIZE {
hits.push(i);
}
for k in 0..5 {
hits.push(2 * HYBRID_CHUNK_SIZE + k * 100);
}
for off in (0..HYBRID_CHUNK_SIZE).step_by(4) {
hits.push(3 * HYBRID_CHUNK_SIZE + off);
}
let s = classify_from_bools(&mk_bools(nrows, &hits));
assert_eq!(s.explain_selection_mode(), "Hybrid");
if let AdaptiveSelection::Hybrid { chunks, .. } = &s {
assert_eq!(chunks.len(), 8);
assert!(matches!(chunks[0], HybridChunk::All));
assert!(matches!(chunks[1], HybridChunk::Empty));
assert!(matches!(chunks[2], HybridChunk::Sparse(_)));
assert!(matches!(chunks[3], HybridChunk::Dense(_)));
assert!(matches!(chunks[7], HybridChunk::Empty));
} else {
panic!("expected Hybrid");
}
}
#[test]
fn sparse_intersect_sparse_uses_merge_walk() {
let a = classify_from_bools(&mk_bools(100_000, &[1, 5, 17, 99, 5_000, 50_000]));
let b = classify_from_bools(&mk_bools(100_000, &[5, 17, 200, 5_000, 99_000]));
assert_eq!(a.explain_selection_mode(), "SelectionVector");
assert_eq!(b.explain_selection_mode(), "SelectionVector");
let r = a.intersect(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, vec![5usize, 17, 5_000]);
}
#[test]
fn sparse_union_sparse_uses_merge_walk() {
let a = classify_from_bools(&mk_bools(100_000, &[1, 5, 17]));
let b = classify_from_bools(&mk_bools(100_000, &[5, 99, 200]));
let r = a.union(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, vec![1usize, 5, 17, 99, 200]);
}
#[test]
fn intersect_cardinality_identity_holds_across_modes() {
let a = classify_from_bools(&mk_bools(100_000, &[1, 5, 17, 99, 5_000, 50_000]));
let b = classify_from_bools(&mk_bools(100_000, &[5, 17, 200, 5_000, 99_000]));
let inter = a.intersect(&b);
let union = a.union(&b);
assert_eq!(a.count() + b.count(), inter.count() + union.count());
}
#[test]
fn merge_walk_intersect_helper_is_correct() {
let a = vec![1u32, 3, 5, 7, 9];
let b = vec![2u32, 3, 5, 6, 9, 11];
let out = sorted_merge_intersect(&a, &b);
assert_eq!(out, vec![3, 5, 9]);
}
#[test]
fn merge_walk_union_helper_is_correct() {
let a = vec![1u32, 3, 5];
let b = vec![2u32, 3, 7];
let out = sorted_merge_union(&a, &b);
assert_eq!(out, vec![1, 2, 3, 5, 7]);
}
fn hybrid_from_hits(nrows: usize, hits: &[usize]) -> AdaptiveSelection {
let s = classify_from_bools(&mk_bools(nrows, hits));
assert_eq!(
s.explain_selection_mode(),
"Hybrid",
"expected Hybrid for {nrows} rows × {} hits",
hits.len()
);
s
}
fn oracle_intersect(a: &AdaptiveSelection, b: &AdaptiveSelection) -> Vec<usize> {
let am = a.materialize_mask();
let bm = b.materialize_mask();
let n = am.nrows();
let mut out = Vec::new();
for i in 0..n {
if am.get(i) && bm.get(i) {
out.push(i);
}
}
out
}
fn oracle_union(a: &AdaptiveSelection, b: &AdaptiveSelection) -> Vec<usize> {
let am = a.materialize_mask();
let bm = b.materialize_mask();
let n = am.nrows();
let mut out = Vec::new();
for i in 0..n {
if am.get(i) || bm.get(i) {
out.push(i);
}
}
out
}
#[test]
fn phase3_hybrid_intersect_hybrid_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect(); let b_hits: Vec<usize> = (0..100_000).step_by(15).collect(); let a = hybrid_from_hits(100_000, &a_hits);
let b = hybrid_from_hits(100_000, &b_hits);
let r = a.intersect(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_intersect(&a, &b));
}
#[test]
fn phase3_hybrid_union_hybrid_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect();
let b_hits: Vec<usize> = (0..100_000).step_by(15).collect();
let a = hybrid_from_hits(100_000, &a_hits);
let b = hybrid_from_hits(100_000, &b_hits);
let r = a.union(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_union(&a, &b));
}
#[test]
fn phase3_hybrid_intersect_sparse_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect(); let b_hits: Vec<usize> = vec![100, 5_000, 50_020, 99_980]; let a = hybrid_from_hits(100_000, &a_hits);
let b = classify_from_bools(&mk_bools(100_000, &b_hits));
assert_eq!(b.explain_selection_mode(), "SelectionVector");
let r = a.intersect(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_intersect(&a, &b));
let r2 = b.intersect(&a);
let collected2: Vec<usize> = r2.iter_indices().collect();
assert_eq!(collected2, oracle_intersect(&a, &b));
}
#[test]
fn phase3_hybrid_union_sparse_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect();
let b_hits: Vec<usize> = vec![1, 50, 100, 5_000, 50_020, 99_999];
let a = hybrid_from_hits(100_000, &a_hits);
let b = classify_from_bools(&mk_bools(100_000, &b_hits));
assert_eq!(b.explain_selection_mode(), "SelectionVector");
let r = a.union(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_union(&a, &b));
}
#[test]
fn phase3_hybrid_intersect_verbatim_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect(); let b_hits: Vec<usize> = (0..100_000).step_by(2).collect(); let a = hybrid_from_hits(100_000, &a_hits);
let b = classify_from_bools(&mk_bools(100_000, &b_hits));
assert_eq!(b.explain_selection_mode(), "VerbatimMask");
let r = a.intersect(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_intersect(&a, &b));
let r2 = b.intersect(&a);
assert_eq!(
r2.iter_indices().collect::<Vec<usize>>(),
oracle_intersect(&a, &b)
);
}
#[test]
fn phase3_hybrid_union_verbatim_matches_oracle() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect();
let b_hits: Vec<usize> = (0..100_000).step_by(2).collect();
let a = hybrid_from_hits(100_000, &a_hits);
let b = classify_from_bools(&mk_bools(100_000, &b_hits));
let r = a.union(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_union(&a, &b));
}
#[test]
fn phase3_hybrid_chain_intersect_three_way() {
let a_hits: Vec<usize> = (0..100_000).step_by(20).collect();
let b_hits: Vec<usize> = (0..100_000).step_by(15).collect();
let c_hits: Vec<usize> = (0..100_000).step_by(12).collect();
let a = hybrid_from_hits(100_000, &a_hits);
let b = hybrid_from_hits(100_000, &b_hits);
let c = hybrid_from_hits(100_000, &c_hits);
let abc = a.intersect(&b).intersect(&c);
let cba = c.intersect(&b).intersect(&a);
assert_eq!(abc.materialize_indices(), cba.materialize_indices());
let am = a.materialize_mask();
let bm = b.materialize_mask();
let cm = c.materialize_mask();
let oracle: Vec<usize> = (0..100_000)
.filter(|&i| am.get(i) && bm.get(i) && cm.get(i))
.collect();
assert_eq!(abc.iter_indices().collect::<Vec<usize>>(), oracle);
}
#[test]
fn phase3_per_chunk_intersect_sparse_sparse_uses_merge_walk() {
let a_hits: Vec<usize> = (0..100_000).step_by(200).collect();
let b_hits: Vec<usize> = (100..100_000).step_by(200).collect();
let a = hybrid_from_hits(100_000, &a_hits);
let b = hybrid_from_hits(100_000, &b_hits);
let r = a.intersect(&b);
assert_eq!(r.count(), 0);
assert_eq!(r.explain_selection_mode(), "Empty");
}
#[test]
fn phase3_per_chunk_union_sparse_sparse_promotes_to_dense_when_large() {
let a_hits: Vec<usize> = (0..100_000).step_by(30).collect();
let b_hits: Vec<usize> = (15..100_000).step_by(30).collect();
let a_count = a_hits.len();
let b_count = b_hits.len();
let a = hybrid_from_hits(100_000, &a_hits);
let b = hybrid_from_hits(100_000, &b_hits);
let r = a.union(&b);
assert_eq!(r.count(), a_count + b_count);
assert_eq!(
r.iter_indices().collect::<Vec<usize>>(),
oracle_union(&a, &b)
);
}
#[test]
fn phase3_simplify_hybrid_collapses_to_all_when_full() {
let a_hits: Vec<usize> = (0..100_000).filter(|i| i % 2 == 0).collect();
let b_hits: Vec<usize> = (0..100_000).filter(|i| i % 2 == 1).collect();
let a_hits: Vec<usize> = (0..100_000).filter(|i| i % 5 != 0).collect(); let b_hits: Vec<usize> = (0..100_000).step_by(5).collect(); let _ = (a_hits, b_hits); let nrows = 8 * HYBRID_CHUNK_SIZE;
let chunks = vec![HybridChunk::All; 8];
let s = AdaptiveSelection::simplify_hybrid(nrows, chunks);
assert_eq!(s.explain_selection_mode(), "All");
assert_eq!(s.count(), nrows);
}
#[test]
fn phase3_simplify_hybrid_collapses_to_empty_when_drained() {
let nrows = 8 * HYBRID_CHUNK_SIZE;
let chunks = vec![HybridChunk::Empty; 8];
let s = AdaptiveSelection::simplify_hybrid(nrows, chunks);
assert_eq!(s.explain_selection_mode(), "Empty");
assert_eq!(s.count(), 0);
}
#[test]
fn phase3_per_chunk_helpers_handle_partial_final_chunk() {
let nrows = 2 * HYBRID_CHUNK_SIZE + 808;
let a_hits: Vec<usize> = (0..nrows).step_by(20).collect();
let b_hits: Vec<usize> = (0..nrows).step_by(15).collect();
let a = hybrid_from_hits(nrows, &a_hits);
let b = hybrid_from_hits(nrows, &b_hits);
let r = a.intersect(&b);
let collected: Vec<usize> = r.iter_indices().collect();
assert_eq!(collected, oracle_intersect(&a, &b));
for &row in &collected {
assert!(row < nrows, "row {row} out of bounds");
}
}
#[test]
fn phase3_intersect_chunks_helper_dense_and_sparse() {
let chunk_len = HYBRID_CHUNK_SIZE;
let sparse = HybridChunk::Sparse(vec![0u16, 5, 17, 100, 4000]);
let mut dense_words = vec![0u64; HYBRID_WORDS_PER_CHUNK];
for off in [5usize, 17, 4000] {
dense_words[off / 64] |= 1u64 << (off % 64);
}
let dense = HybridChunk::Dense(dense_words.into_boxed_slice());
let r = intersect_chunks(&sparse, &dense, chunk_len);
match r {
HybridChunk::Sparse(offs) => assert_eq!(offs, vec![5, 17, 4000]),
other => panic!("expected Sparse, got {other:?}"),
}
}
#[test]
fn phase3_union_chunks_helper_demotes_to_sparse_when_small() {
let chunk_len = HYBRID_CHUNK_SIZE;
let a = HybridChunk::Sparse(vec![1u16, 2, 3]);
let b = HybridChunk::Sparse(vec![4u16, 5, 6]);
let r = union_chunks(&a, &b, chunk_len);
match r {
HybridChunk::Sparse(offs) => assert_eq!(offs, vec![1, 2, 3, 4, 5, 6]),
other => panic!("expected Sparse, got {other:?}"),
}
}
#[test]
fn phase3_union_chunks_helper_promotes_to_dense_above_threshold() {
let chunk_len = HYBRID_CHUNK_SIZE;
let a_offs: Vec<u16> = (0..70).map(|i| i * 2).collect();
let b_offs: Vec<u16> = (0..70).map(|i| i * 2 + 1).collect();
let a = HybridChunk::Sparse(a_offs);
let b = HybridChunk::Sparse(b_offs);
let r = union_chunks(&a, &b, chunk_len);
match r {
HybridChunk::Dense(words) => {
let count: usize = words.iter().map(|w| w.count_ones() as usize).sum();
assert_eq!(count, 140);
}
other => panic!("expected Dense (overflow), got {other:?}"),
}
}
}