use std::sync::atomic::{AtomicBool, Ordering};
use rustc_hash::FxHashMap;
use crate::core::{Operator, Value};
pub const DEFAULT_SEGMENT_SIZE: usize = 1000;
#[derive(Debug, Clone)]
pub struct ZoneMapEntry {
pub segment_id: u32,
pub min_value: Option<Value>,
pub max_value: Option<Value>,
pub null_count: u32,
pub row_count: u32,
}
impl ZoneMapEntry {
pub fn new(segment_id: u32) -> Self {
Self {
segment_id,
min_value: None,
max_value: None,
null_count: 0,
row_count: 0,
}
}
pub fn update(&mut self, value: &Value) {
self.row_count += 1;
if value.is_null() {
self.null_count += 1;
return;
}
match &self.min_value {
None => self.min_value = Some(value.clone()),
Some(current_min) => {
if value < current_min {
self.min_value = Some(value.clone());
}
}
}
match &self.max_value {
None => self.max_value = Some(value.clone()),
Some(current_max) => {
if value > current_max {
self.max_value = Some(value.clone());
}
}
}
}
pub fn can_prune(&self, operator: Operator, value: &Value) -> bool {
if value.is_null() {
return match operator {
Operator::Eq => self.null_count == 0, _ => false, };
}
let (min, max) = match (&self.min_value, &self.max_value) {
(Some(min), Some(max)) => (min, max),
_ => return false, };
match operator {
Operator::Eq => value < min || value > max,
Operator::Ne => false,
Operator::Lt => min >= value,
Operator::Lte => min > value,
Operator::Gt => max <= value,
Operator::Gte => max < value,
Operator::Like
| Operator::In
| Operator::NotIn
| Operator::IsNull
| Operator::IsNotNull => false,
}
}
}
#[derive(Debug, Clone)]
pub struct ColumnZoneMap {
pub column_name: String,
pub segments: Vec<ZoneMapEntry>,
pub global_min: Option<Value>,
pub global_max: Option<Value>,
}
impl ColumnZoneMap {
pub fn new(column_name: impl Into<String>) -> Self {
Self {
column_name: column_name.into(),
segments: Vec::new(),
global_min: None,
global_max: None,
}
}
pub fn get_or_create_segment(&mut self, segment_id: u32) -> &mut ZoneMapEntry {
while self.segments.len() <= segment_id as usize {
self.segments
.push(ZoneMapEntry::new(self.segments.len() as u32));
}
&mut self.segments[segment_id as usize]
}
pub fn update_segment(&mut self, segment_id: u32, value: &Value) {
let entry = self.get_or_create_segment(segment_id);
entry.update(value);
if !value.is_null() {
match &self.global_min {
None => self.global_min = Some(value.clone()),
Some(current) if value < current => self.global_min = Some(value.clone()),
_ => {}
}
match &self.global_max {
None => self.global_max = Some(value.clone()),
Some(current) if value > current => self.global_max = Some(value.clone()),
_ => {}
}
}
}
pub fn get_unpruned_segments(&self, operator: Operator, value: &Value) -> Vec<u32> {
self.segments
.iter()
.filter(|entry| !entry.can_prune(operator, value))
.map(|entry| entry.segment_id)
.collect()
}
pub fn count_pruned_segments(&self, operator: Operator, value: &Value) -> usize {
self.segments
.iter()
.filter(|entry| entry.can_prune(operator, value))
.count()
}
}
#[derive(Debug)]
pub struct TableZoneMap {
pub columns: FxHashMap<String, ColumnZoneMap>,
pub segment_size: usize,
pub segment_count: u32,
pub stale: AtomicBool,
}
impl Clone for TableZoneMap {
fn clone(&self) -> Self {
Self {
columns: self.columns.clone(),
segment_size: self.segment_size,
segment_count: self.segment_count,
stale: AtomicBool::new(self.stale.load(Ordering::SeqCst)),
}
}
}
impl TableZoneMap {
pub fn new(segment_size: usize) -> Self {
Self {
columns: FxHashMap::default(),
segment_size,
segment_count: 0,
stale: AtomicBool::new(false),
}
}
pub fn segment_for_row(&self, row_index: usize) -> u32 {
(row_index / self.segment_size) as u32
}
pub fn update_row(&mut self, row_index: usize, columns: &[(String, Value)]) {
let segment_id = self.segment_for_row(row_index);
if segment_id >= self.segment_count {
self.segment_count = segment_id + 1;
}
for (col_name, value) in columns {
let col_map = self
.columns
.entry(col_name.clone())
.or_insert_with(|| ColumnZoneMap::new(col_name.clone()));
col_map.update_segment(segment_id, value);
}
}
pub fn mark_stale(&self) {
self.stale.store(true, Ordering::SeqCst);
}
pub fn is_stale(&self) -> bool {
self.stale.load(Ordering::SeqCst)
}
pub fn clear_stale(&self) {
self.stale.store(false, Ordering::SeqCst);
}
pub fn get_prune_stats(
&self,
column: &str,
operator: Operator,
value: &Value,
) -> Option<PruneStats> {
let col_map = self.columns.get(column)?;
let total = col_map.segments.len();
let pruned = col_map.count_pruned_segments(operator, value);
Some(PruneStats {
total_segments: total,
pruned_segments: pruned,
scanned_segments: total - pruned,
})
}
pub fn get_segments_to_scan(
&self,
column: &str,
operator: Operator,
value: &Value,
) -> Option<Vec<u32>> {
let col_map = self.columns.get(column)?;
Some(col_map.get_unpruned_segments(operator, value))
}
}
#[derive(Debug, Clone)]
pub struct PruneStats {
pub total_segments: usize,
pub pruned_segments: usize,
pub scanned_segments: usize,
}
impl std::fmt::Display for PruneStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let pct = if self.total_segments > 0 {
(self.pruned_segments as f64 / self.total_segments as f64) * 100.0
} else {
0.0
};
write!(
f,
"{}/{} segments scanned ({:.0}% pruned)",
self.scanned_segments, self.total_segments, pct
)
}
}
pub struct ZoneMapBuilder {
zone_map: TableZoneMap,
current_row: usize,
}
impl ZoneMapBuilder {
pub fn new(segment_size: usize) -> Self {
Self {
zone_map: TableZoneMap::new(segment_size),
current_row: 0,
}
}
pub fn add_row(&mut self, columns: &[(String, Value)]) {
self.zone_map.update_row(self.current_row, columns);
self.current_row += 1;
}
pub fn build(self) -> TableZoneMap {
self.zone_map
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_zone_map_entry_basic() {
let mut entry = ZoneMapEntry::new(0);
entry.update(&Value::Integer(10));
entry.update(&Value::Integer(20));
entry.update(&Value::Integer(15));
assert_eq!(entry.row_count, 3);
assert_eq!(entry.null_count, 0);
assert_eq!(entry.min_value, Some(Value::Integer(10)));
assert_eq!(entry.max_value, Some(Value::Integer(20)));
}
#[test]
fn test_zone_map_entry_with_nulls() {
use crate::core::DataType;
let mut entry = ZoneMapEntry::new(0);
entry.update(&Value::Integer(10));
entry.update(&Value::null(DataType::Integer));
entry.update(&Value::Integer(20));
assert_eq!(entry.row_count, 3);
assert_eq!(entry.null_count, 1);
assert_eq!(entry.min_value, Some(Value::Integer(10)));
assert_eq!(entry.max_value, Some(Value::Integer(20)));
}
#[test]
fn test_zone_map_pruning() {
let mut entry = ZoneMapEntry::new(0);
for i in 10..=20 {
entry.update(&Value::Integer(i));
}
assert!(entry.can_prune(Operator::Eq, &Value::Integer(5)));
assert!(entry.can_prune(Operator::Eq, &Value::Integer(25)));
assert!(!entry.can_prune(Operator::Eq, &Value::Integer(15)));
assert!(entry.can_prune(Operator::Lt, &Value::Integer(10)));
assert!(!entry.can_prune(Operator::Lt, &Value::Integer(15)));
assert!(entry.can_prune(Operator::Gt, &Value::Integer(20)));
assert!(!entry.can_prune(Operator::Gt, &Value::Integer(15)));
}
#[test]
fn test_column_zone_map() {
let mut col_map = ColumnZoneMap::new("id");
for i in 1..=10 {
col_map.update_segment(0, &Value::Integer(i));
}
for i in 11..=20 {
col_map.update_segment(1, &Value::Integer(i));
}
for i in 21..=30 {
col_map.update_segment(2, &Value::Integer(i));
}
assert_eq!(col_map.segments.len(), 3);
assert_eq!(col_map.global_min, Some(Value::Integer(1)));
assert_eq!(col_map.global_max, Some(Value::Integer(30)));
let unpruned = col_map.get_unpruned_segments(Operator::Gt, &Value::Integer(25));
assert_eq!(unpruned, vec![2]);
let unpruned = col_map.get_unpruned_segments(Operator::Eq, &Value::Integer(15));
assert_eq!(unpruned, vec![1]);
}
#[test]
fn test_table_zone_map() {
let mut table_map = TableZoneMap::new(10);
for i in 0..30 {
table_map.update_row(
i,
&[
("id".to_string(), Value::Integer(i as i64)),
("name".to_string(), Value::text(format!("row{}", i))),
],
);
}
assert_eq!(table_map.segment_count, 3);
let stats = table_map
.get_prune_stats("id", Operator::Gt, &Value::Integer(25))
.unwrap();
assert_eq!(stats.total_segments, 3);
assert_eq!(stats.pruned_segments, 2);
assert_eq!(stats.scanned_segments, 1);
}
#[test]
fn test_zone_map_builder() {
let mut builder = ZoneMapBuilder::new(5);
for i in 0..20 {
builder.add_row(&[("value".to_string(), Value::Integer(i))]);
}
let zone_map = builder.build();
assert_eq!(zone_map.segment_count, 4);
let col_map = zone_map.columns.get("value").unwrap();
assert_eq!(col_map.segments.len(), 4);
assert_eq!(col_map.segments[0].min_value, Some(Value::Integer(0)));
assert_eq!(col_map.segments[0].max_value, Some(Value::Integer(4)));
assert_eq!(col_map.segments[3].min_value, Some(Value::Integer(15)));
assert_eq!(col_map.segments[3].max_value, Some(Value::Integer(19)));
}
}