use crate::{buffer::LanceBuffer, data::DataBlock, format::pb21::CompressiveEncoding};
use lance_core::Result;
pub const MAX_MINIBLOCK_BYTES: u64 = 8 * 1024 - 6;
const DEFAULT_MAX_MINIBLOCK_VALUES: u64 = 4096;
fn parse_max_miniblock_values() -> u64 {
let val = std::env::var("LANCE_MINIBLOCK_MAX_VALUES")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(DEFAULT_MAX_MINIBLOCK_VALUES);
val.clamp(1, DEFAULT_MAX_MINIBLOCK_VALUES)
}
pub static MAX_MINIBLOCK_VALUES: std::sync::LazyLock<u64> =
std::sync::LazyLock::new(parse_max_miniblock_values);
#[derive(Debug)]
pub struct MiniBlockCompressed {
pub data: Vec<LanceBuffer>,
pub chunks: Vec<MiniBlockChunk>,
pub num_values: u64,
}
#[derive(Debug)]
pub struct MiniBlockChunk {
pub buffer_sizes: Vec<u32>,
pub log_num_values: u8,
}
impl MiniBlockChunk {
pub fn num_values(&self, vals_in_prev_blocks: u64, total_num_values: u64) -> u64 {
if self.log_num_values == 0 {
total_num_values - vals_in_prev_blocks
} else {
1 << self.log_num_values
}
}
}
pub trait MiniBlockCompressor: std::fmt::Debug + Send + Sync {
fn compress(&self, page: DataBlock) -> Result<(MiniBlockCompressed, CompressiveEncoding)>;
}
#[cfg(test)]
mod tests {
use serial_test::serial;
use super::*;
#[test]
#[serial]
fn test_parse_default() {
unsafe { std::env::remove_var("LANCE_MINIBLOCK_MAX_VALUES") };
assert_eq!(parse_max_miniblock_values(), 4096);
}
#[test]
#[serial]
fn test_parse_custom_value() {
unsafe { std::env::set_var("LANCE_MINIBLOCK_MAX_VALUES", "256") };
assert_eq!(parse_max_miniblock_values(), 256);
unsafe { std::env::remove_var("LANCE_MINIBLOCK_MAX_VALUES") };
}
#[test]
#[serial]
fn test_parse_clamps_zero_to_one() {
unsafe { std::env::set_var("LANCE_MINIBLOCK_MAX_VALUES", "0") };
assert_eq!(parse_max_miniblock_values(), 1);
unsafe { std::env::remove_var("LANCE_MINIBLOCK_MAX_VALUES") };
}
#[test]
#[serial]
fn test_parse_clamps_above_max() {
unsafe { std::env::set_var("LANCE_MINIBLOCK_MAX_VALUES", "99999") };
assert_eq!(parse_max_miniblock_values(), DEFAULT_MAX_MINIBLOCK_VALUES);
unsafe { std::env::remove_var("LANCE_MINIBLOCK_MAX_VALUES") };
}
#[test]
#[serial]
fn test_parse_invalid_falls_back_to_default() {
unsafe { std::env::set_var("LANCE_MINIBLOCK_MAX_VALUES", "not_a_number") };
assert_eq!(parse_max_miniblock_values(), DEFAULT_MAX_MINIBLOCK_VALUES);
unsafe { std::env::remove_var("LANCE_MINIBLOCK_MAX_VALUES") };
}
}