use std::collections::BTreeSet;
use std::ops::Range;
use vortex_scan::selection::Selection;
use crate::scan::IDEAL_SPLIT_SIZE;
const MAX_RANGE_SIZE: u64 = IDEAL_SPLIT_SIZE / 25;
const MIN_GAP_BETWEEN_RANGES: u64 = IDEAL_SPLIT_SIZE / 2;
pub enum Splits {
Natural(BTreeSet<u64>),
Ranges(Vec<Range<u64>>),
}
pub fn attempt_split_ranges(
selection: &Selection,
row_range: Option<&Range<u64>>,
) -> Option<Vec<Range<u64>>> {
let Selection::IncludeByIndex(buffer) = selection else {
return None;
};
if row_range.is_some() {
return None;
}
let indices = buffer.as_slice();
if indices.is_empty() {
return Some(Vec::new());
}
debug_assert!(indices.is_sorted());
let mut ranges = Vec::with_capacity((indices.len() as u64 / MAX_RANGE_SIZE) as usize);
let mut curr_start = indices[0];
let mut curr_end = indices[0] + 1;
for &idx in &indices[1..] {
let new_range_size = (idx + 1) - curr_start;
let gap = (idx + 1) - curr_end;
if new_range_size >= MAX_RANGE_SIZE {
if gap >= MIN_GAP_BETWEEN_RANGES {
ranges.push(curr_start..curr_end);
curr_start = idx;
curr_end = idx + 1;
} else {
return None;
}
} else {
curr_end = idx + 1;
}
}
ranges.push(curr_start..curr_end);
Some(ranges)
}