use std::sync::Arc;
use arcstr::ArcStr;
use grafeo_common::types::Value;
use crate::codec::{BitPackedInts, BitVector, DictionaryEncoding};
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum ColumnCodec {
BitPacked(BitPackedInts),
Dict(DictionaryEncoding),
Bitmap(BitVector),
Int8Vector {
data: Vec<i8>,
dimensions: u16,
},
Float64(Vec<f64>),
Float32Vector {
data: Vec<f32>,
dimensions: u16,
},
RawI64(Vec<i64>),
}
impl ColumnCodec {
#[inline]
#[must_use]
pub fn get(&self, index: usize) -> Option<Value> {
match self {
Self::BitPacked(bp) => bp.get(index).map(|v| {
#[allow(clippy::cast_possible_wrap)]
let val = Value::Int64(v as i64);
val
}),
Self::Dict(dict) => dict.get(index).map(|s| Value::String(ArcStr::from(s))),
Self::Bitmap(bv) => bv.get(index).map(Value::Bool),
Self::Int8Vector { data, dimensions } => {
let dims = *dimensions as usize;
if dims == 0 {
return None;
}
let start = index.checked_mul(dims)?;
let end = start.checked_add(dims)?;
if end > data.len() {
return None;
}
let values: Vec<Value> = data[start..end]
.iter()
.map(|&v| Value::Int64(v as i64))
.collect();
Some(Value::List(Arc::from(values)))
}
Self::Float64(vec) => vec.get(index).copied().map(Value::Float64),
Self::RawI64(vec) => vec.get(index).copied().map(Value::Int64),
Self::Float32Vector { data, dimensions } => {
let dims = *dimensions as usize;
if dims == 0 {
return None;
}
let start = index.checked_mul(dims)?;
let end = start.checked_add(dims)?;
if end > data.len() {
return None;
}
Some(Value::Vector(Arc::from(&data[start..end])))
}
}
}
#[inline]
#[must_use]
pub fn get_raw_u64(&self, index: usize) -> Option<u64> {
match self {
Self::BitPacked(bp) => bp.get(index),
_ => None,
}
}
#[must_use]
pub fn get_int8_vector(&self, index: usize) -> Option<&[i8]> {
match self {
Self::Int8Vector { data, dimensions } => {
let dims = *dimensions as usize;
if dims == 0 {
return None;
}
let start = index.checked_mul(dims)?;
let end = start.checked_add(dims)?;
if end > data.len() {
return None;
}
Some(&data[start..end])
}
_ => None,
}
}
#[must_use]
pub fn len(&self) -> usize {
match self {
Self::BitPacked(bp) => bp.len(),
Self::Dict(dict) => dict.len(),
Self::Bitmap(bv) => bv.len(),
Self::Int8Vector { data, dimensions } => {
let dims = *dimensions as usize;
data.len().checked_div(dims).unwrap_or(0)
}
Self::Float64(vec) => vec.len(),
Self::Float32Vector { data, dimensions } => {
let dims = *dimensions as usize;
data.len().checked_div(dims).unwrap_or(0)
}
Self::RawI64(vec) => vec.len(),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn find_eq(&self, target: &Value) -> Vec<usize> {
match (self, target) {
(Self::BitPacked(bp), &Value::Int64(v)) => {
if v < 0 {
return Vec::new();
}
#[allow(clippy::cast_sign_loss)]
let target_u64 = v as u64;
(0..bp.len())
.filter(|&i| bp.get(i) == Some(target_u64))
.collect()
}
(Self::Dict(dict), Value::String(s)) => match dict.encode(s.as_str()) {
Some(code) => dict.filter_by_code(|c| c == code),
None => Vec::new(),
},
(Self::Bitmap(bv), &Value::Bool(target_bool)) => (0..bv.len())
.filter(|&i| bv.get(i) == Some(target_bool))
.collect(),
(Self::Float64(vec), &Value::Float64(target)) => vec
.iter()
.enumerate()
.filter(|&(_, v)| *v == target)
.map(|(i, _)| i)
.collect(),
(Self::RawI64(vec), &Value::Int64(target)) => vec
.iter()
.enumerate()
.filter(|&(_, v)| *v == target)
.map(|(i, _)| i)
.collect(),
_ => (0..self.len())
.filter(|&i| self.get(i).as_ref() == Some(target))
.collect(),
}
}
pub fn find_in_range(
&self,
min: Option<&Value>,
max: Option<&Value>,
min_inclusive: bool,
max_inclusive: bool,
) -> Vec<usize> {
if let Self::BitPacked(bp) = self {
let min_u64 = match min {
#[allow(clippy::cast_sign_loss)]
Some(&Value::Int64(v)) if v >= 0 => Some(v as u64),
Some(&Value::Int64(_)) => Some(0),
None => None,
_ => return self.find_in_range_fallback(min, max, min_inclusive, max_inclusive),
};
let max_u64 = match max {
#[allow(clippy::cast_sign_loss)]
Some(&Value::Int64(v)) if v >= 0 => Some(v as u64),
Some(&Value::Int64(v)) if v < 0 => return Vec::new(),
None => None,
_ => return self.find_in_range_fallback(min, max, min_inclusive, max_inclusive),
};
return (0..bp.len())
.filter(|&i| {
if let Some(v) = bp.get(i) {
let above_min = match min_u64 {
Some(lo) if min_inclusive => v >= lo,
Some(lo) => v > lo,
None => true,
};
let below_max = match max_u64 {
Some(hi) if max_inclusive => v <= hi,
Some(hi) => v < hi,
None => true,
};
above_min && below_max
} else {
false
}
})
.collect();
}
if let Self::RawI64(values) = self {
let min_i64 = match min {
Some(&Value::Int64(v)) => Some(v),
None => None,
_ => return self.find_in_range_fallback(min, max, min_inclusive, max_inclusive),
};
let max_i64 = match max {
Some(&Value::Int64(v)) => Some(v),
None => None,
_ => return self.find_in_range_fallback(min, max, min_inclusive, max_inclusive),
};
return values
.iter()
.enumerate()
.filter(|&(_, &v)| {
let above_min = match min_i64 {
Some(lo) if min_inclusive => v >= lo,
Some(lo) => v > lo,
None => true,
};
let below_max = match max_i64 {
Some(hi) if max_inclusive => v <= hi,
Some(hi) => v < hi,
None => true,
};
above_min && below_max
})
.map(|(i, _)| i)
.collect();
}
self.find_in_range_fallback(min, max, min_inclusive, max_inclusive)
}
fn find_in_range_fallback(
&self,
min: Option<&Value>,
max: Option<&Value>,
min_inclusive: bool,
max_inclusive: bool,
) -> Vec<usize> {
use super::zone_map::compare_values;
(0..self.len())
.filter(|&i| {
let Some(v) = self.get(i) else {
return false;
};
if let Some(min_val) = min {
match compare_values(&v, min_val) {
Some(std::cmp::Ordering::Less) => return false,
Some(std::cmp::Ordering::Equal) if !min_inclusive => return false,
None => return false,
_ => {}
}
}
if let Some(max_val) = max {
match compare_values(&v, max_val) {
Some(std::cmp::Ordering::Greater) => return false,
Some(std::cmp::Ordering::Equal) if !max_inclusive => return false,
None => return false,
_ => {}
}
}
true
})
.collect()
}
pub fn write_to(&self, buf: &mut Vec<u8>) {
match self {
Self::BitPacked(bp) => {
buf.push(0); buf.push(bp.bits_per_value());
write_usize_as_u32(buf, bp.len());
let data = bp.data();
write_usize_as_u32(buf, data.len());
for &word in data {
buf.extend_from_slice(&word.to_le_bytes());
}
}
Self::Dict(dict) => {
buf.push(1); let dict_entries = dict.dictionary();
write_usize_as_u32(buf, dict_entries.len());
for entry in dict_entries.iter() {
let s = entry.as_ref().as_bytes();
write_usize_as_u32(buf, s.len());
buf.extend_from_slice(s);
}
let codes = dict.codes();
write_usize_as_u32(buf, codes.len());
for &code in codes {
buf.extend_from_slice(&code.to_le_bytes());
}
}
Self::Bitmap(bv) => {
buf.push(2); write_usize_as_u32(buf, bv.len());
let data = bv.data();
write_usize_as_u32(buf, data.len());
for &word in data {
buf.extend_from_slice(&word.to_le_bytes());
}
}
Self::Int8Vector { data, dimensions } => {
buf.push(3); buf.extend_from_slice(&dimensions.to_le_bytes());
write_usize_as_u32(buf, data.len());
for &v in data {
buf.push(v.to_le_bytes()[0]);
}
}
Self::Float64(vec) => {
buf.push(4); write_usize_as_u32(buf, vec.len());
for &v in vec {
buf.extend_from_slice(&v.to_le_bytes());
}
}
Self::Float32Vector { data, dimensions } => {
buf.push(5); buf.extend_from_slice(&dimensions.to_le_bytes());
write_usize_as_u32(buf, data.len());
for &v in data {
buf.extend_from_slice(&v.to_le_bytes());
}
}
Self::RawI64(vec) => {
buf.push(6); write_usize_as_u32(buf, vec.len());
for &v in vec {
buf.extend_from_slice(&v.to_le_bytes());
}
}
}
}
pub fn read_from(data: &[u8], pos: &mut usize) -> Result<Self, &'static str> {
let discriminant = *data.get(*pos).ok_or("truncated codec discriminant")?;
*pos += 1;
match discriminant {
0 => {
let bits = *data.get(*pos).ok_or("truncated bits_per_value")?;
*pos += 1;
let count = read_u32_le(data, pos)? as usize;
let data_len = read_u32_le(data, pos)? as usize;
let mut words = Vec::with_capacity(data_len);
for _ in 0..data_len {
words.push(read_u64_le(data, pos)?);
}
Ok(Self::BitPacked(BitPackedInts::from_raw_parts(
words, bits, count,
)))
}
1 => {
let dict_len = read_u32_le(data, pos)? as usize;
let mut entries: Vec<Arc<str>> = Vec::with_capacity(dict_len);
for _ in 0..dict_len {
let slen = read_u32_le(data, pos)? as usize;
if *pos + slen > data.len() {
return Err("truncated dict string");
}
let s = std::str::from_utf8(&data[*pos..*pos + slen])
.map_err(|_| "invalid UTF-8 in dict")?;
entries.push(Arc::from(s));
*pos += slen;
}
let codes_len = read_u32_le(data, pos)? as usize;
let mut codes = Vec::with_capacity(codes_len);
for _ in 0..codes_len {
codes.push(read_u32_le(data, pos)?);
}
Ok(Self::Dict(DictionaryEncoding::new(
Arc::from(entries.into_boxed_slice()),
codes,
)))
}
2 => {
let bit_len = read_u32_le(data, pos)? as usize;
let data_len = read_u32_le(data, pos)? as usize;
let mut words = Vec::with_capacity(data_len);
for _ in 0..data_len {
words.push(read_u64_le(data, pos)?);
}
Ok(Self::Bitmap(BitVector::from_raw_parts(words, bit_len)))
}
3 => {
let dimensions = read_u16_le(data, pos)?;
let data_len = read_u32_le(data, pos)? as usize;
if *pos + data_len > data.len() {
return Err("truncated Int8Vector data");
}
let bytes = &data[*pos..*pos + data_len];
let i8_data: Vec<i8> = bytes.iter().map(|&b| i8::from_le_bytes([b])).collect();
*pos += data_len;
Ok(Self::Int8Vector {
data: i8_data,
dimensions,
})
}
4 => {
let count = read_u32_le(data, pos)? as usize;
let mut vec = Vec::with_capacity(count);
for _ in 0..count {
vec.push(read_f64_le(data, pos)?);
}
Ok(Self::Float64(vec))
}
5 => {
let dimensions = read_u16_le(data, pos)?;
let data_len = read_u32_le(data, pos)? as usize;
let byte_need = data_len
.checked_mul(4)
.ok_or("Float32Vector length overflow")?;
if *pos + byte_need > data.len() {
return Err("truncated Float32Vector data");
}
let mut f32_data = Vec::with_capacity(data_len);
for _ in 0..data_len {
f32_data.push(read_f32_le(data, pos)?);
}
Ok(Self::Float32Vector {
data: f32_data,
dimensions,
})
}
6 => {
let count = read_u32_le(data, pos)? as usize;
let mut vec = Vec::with_capacity(count);
for _ in 0..count {
vec.push(read_i64_le(data, pos)?);
}
Ok(Self::RawI64(vec))
}
_ => Err("unknown codec discriminant"),
}
}
#[must_use]
pub fn heap_bytes(&self) -> usize {
match self {
Self::BitPacked(bp) => bp.data().len() * std::mem::size_of::<u64>(),
Self::Dict(d) => {
let codes_bytes = d.codes().len() * std::mem::size_of::<u32>();
let dict_bytes: usize = d.dictionary().iter().map(|s| s.len()).sum();
codes_bytes + dict_bytes
}
Self::Bitmap(bv) => bv.data().len() * std::mem::size_of::<u64>(),
Self::Int8Vector { data, .. } => data.len(),
Self::Float64(vec) => vec.len() * std::mem::size_of::<f64>(),
Self::Float32Vector { data, .. } => data.len() * std::mem::size_of::<f32>(),
Self::RawI64(vec) => vec.len() * std::mem::size_of::<i64>(),
}
}
}
fn write_usize_as_u32(buf: &mut Vec<u8>, v: usize) {
let n = u32::try_from(v).expect("value exceeds u32::MAX in compact codec serialization");
buf.extend_from_slice(&n.to_le_bytes());
}
fn read_u16_le(data: &[u8], pos: &mut usize) -> Result<u16, &'static str> {
if *pos + 2 > data.len() {
return Err("truncated u16");
}
let v = u16::from_le_bytes([data[*pos], data[*pos + 1]]);
*pos += 2;
Ok(v)
}
fn read_u32_le(data: &[u8], pos: &mut usize) -> Result<u32, &'static str> {
if *pos + 4 > data.len() {
return Err("truncated u32");
}
let v = u32::from_le_bytes([data[*pos], data[*pos + 1], data[*pos + 2], data[*pos + 3]]);
*pos += 4;
Ok(v)
}
fn read_u64_le(data: &[u8], pos: &mut usize) -> Result<u64, &'static str> {
if *pos + 8 > data.len() {
return Err("truncated u64");
}
let v = u64::from_le_bytes(data[*pos..*pos + 8].try_into().unwrap());
*pos += 8;
Ok(v)
}
fn read_f64_le(data: &[u8], pos: &mut usize) -> Result<f64, &'static str> {
if *pos + 8 > data.len() {
return Err("truncated f64");
}
let v = f64::from_le_bytes(data[*pos..*pos + 8].try_into().unwrap());
*pos += 8;
Ok(v)
}
fn read_i64_le(data: &[u8], pos: &mut usize) -> Result<i64, &'static str> {
if *pos + 8 > data.len() {
return Err("truncated i64");
}
let v = i64::from_le_bytes(data[*pos..*pos + 8].try_into().unwrap());
*pos += 8;
Ok(v)
}
fn read_f32_le(data: &[u8], pos: &mut usize) -> Result<f32, &'static str> {
if *pos + 4 > data.len() {
return Err("truncated f32");
}
let v = f32::from_le_bytes(data[*pos..*pos + 4].try_into().unwrap());
*pos += 4;
Ok(v)
}
#[cfg(test)]
#[allow(clippy::cast_possible_wrap)]
mod tests {
use super::*;
use crate::codec::{BitPackedInts, BitVector, DictionaryBuilder};
#[test]
fn test_bitpacked_round_trip() {
let values = vec![0u64, 5, 10, 15, 3, 7];
let bp = BitPackedInts::pack(&values);
let col = ColumnCodec::BitPacked(bp);
assert_eq!(col.len(), 6);
assert!(!col.is_empty());
for (i, &expected) in values.iter().enumerate() {
let v = col.get(i).unwrap();
assert_eq!(v, Value::Int64(expected as i64));
}
}
#[test]
fn test_dict_round_trip() {
let mut builder = DictionaryBuilder::new();
builder.add("alpha");
builder.add("beta");
builder.add("alpha");
let dict = builder.build();
let col = ColumnCodec::Dict(dict);
assert_eq!(col.len(), 3);
assert_eq!(col.get(0), Some(Value::String(ArcStr::from("alpha"))));
assert_eq!(col.get(1), Some(Value::String(ArcStr::from("beta"))));
assert_eq!(col.get(2), Some(Value::String(ArcStr::from("alpha"))));
}
#[test]
fn test_bitmap_round_trip() {
let bools = vec![true, false, true, true, false];
let bv = BitVector::from_bools(&bools);
let col = ColumnCodec::Bitmap(bv);
assert_eq!(col.len(), 5);
assert_eq!(col.get(0), Some(Value::Bool(true)));
assert_eq!(col.get(1), Some(Value::Bool(false)));
assert_eq!(col.get(2), Some(Value::Bool(true)));
assert_eq!(col.get(3), Some(Value::Bool(true)));
assert_eq!(col.get(4), Some(Value::Bool(false)));
}
#[test]
fn test_int8_vector_round_trip() {
let data = vec![1i8, 2, 3, -4, -5, -6];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
assert_eq!(col.len(), 2);
let v0 = col.get(0).unwrap();
let expected0: Vec<Value> = vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)];
assert_eq!(v0, Value::List(Arc::from(expected0)));
let v1 = col.get(1).unwrap();
let expected1: Vec<Value> = vec![Value::Int64(-4), Value::Int64(-5), Value::Int64(-6)];
assert_eq!(v1, Value::List(Arc::from(expected1)));
}
#[test]
fn test_get_raw_u64_on_bitpacked() {
let values = vec![100u64, 200, 300];
let bp = BitPackedInts::pack(&values);
let col = ColumnCodec::BitPacked(bp);
assert_eq!(col.get_raw_u64(0), Some(100));
assert_eq!(col.get_raw_u64(1), Some(200));
assert_eq!(col.get_raw_u64(2), Some(300));
assert_eq!(col.get_raw_u64(3), None);
let bv = BitVector::from_bools(&[true]);
let bm_col = ColumnCodec::Bitmap(bv);
assert_eq!(bm_col.get_raw_u64(0), None);
}
#[test]
fn test_get_int8_vector_slice() {
let data = vec![10i8, 20, 30, 40, 50, 60];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
assert_eq!(col.get_int8_vector(0), Some(&[10i8, 20, 30][..]));
assert_eq!(col.get_int8_vector(1), Some(&[40i8, 50, 60][..]));
assert_eq!(col.get_int8_vector(2), None);
let bp = BitPackedInts::pack(&[1u64]);
let bp_col = ColumnCodec::BitPacked(bp);
assert_eq!(bp_col.get_int8_vector(0), None);
}
#[test]
fn test_out_of_bounds_returns_none() {
let bp = BitPackedInts::pack(&[1u64, 2, 3]);
let col = ColumnCodec::BitPacked(bp);
assert_eq!(col.get(999), None);
assert_eq!(col.get_raw_u64(999), None);
let bv = BitVector::from_bools(&[true]);
let bm = ColumnCodec::Bitmap(bv);
assert_eq!(bm.get(5), None);
let mut builder = DictionaryBuilder::new();
builder.add("x");
let dict = builder.build();
let dc = ColumnCodec::Dict(dict);
assert_eq!(dc.get(10), None);
let vec_col = ColumnCodec::Int8Vector {
data: vec![1, 2],
dimensions: 2,
};
assert_eq!(vec_col.get(1), None);
assert_eq!(vec_col.get_int8_vector(1), None);
}
#[test]
fn test_find_eq_bitpacked() {
let values = vec![0u64, 5, 10, 5, 3, 5];
let bp = BitPackedInts::pack(&values);
let col = ColumnCodec::BitPacked(bp);
assert_eq!(col.find_eq(&Value::Int64(5)), vec![1, 3, 5]);
assert_eq!(col.find_eq(&Value::Int64(0)), vec![0]);
assert_eq!(col.find_eq(&Value::Int64(99)), Vec::<usize>::new());
assert_eq!(col.find_eq(&Value::Int64(-1)), Vec::<usize>::new());
}
#[test]
fn test_find_eq_dict() {
let mut builder = DictionaryBuilder::new();
for name in ["Vincent", "Jules", "Vincent", "Mia", "Jules"] {
builder.add(name);
}
let col = ColumnCodec::Dict(builder.build());
assert_eq!(col.find_eq(&Value::String("Vincent".into())), vec![0, 2]);
assert_eq!(col.find_eq(&Value::String("Mia".into())), vec![3]);
assert_eq!(
col.find_eq(&Value::String("Butch".into())),
Vec::<usize>::new()
);
}
#[test]
fn test_find_eq_bitmap() {
let bools = vec![true, false, true, true, false];
let col = ColumnCodec::Bitmap(BitVector::from_bools(&bools));
assert_eq!(col.find_eq(&Value::Bool(true)), vec![0, 2, 3]);
assert_eq!(col.find_eq(&Value::Bool(false)), vec![1, 4]);
}
#[test]
fn test_find_eq_type_mismatch_uses_fallback() {
let values = vec![1u64, 2, 3];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
assert_eq!(
col.find_eq(&Value::String("hello".into())),
Vec::<usize>::new()
);
}
#[test]
fn test_find_eq_int8_vector_uses_fallback() {
let data = vec![1i8, 2, 3, 4, 5, 6];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
let target_vec: Vec<Value> = vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)];
let target = Value::List(Arc::from(target_vec));
let matches = col.find_eq(&target);
assert_eq!(matches, vec![0]);
}
#[test]
fn test_int8_vector_zero_dimensions_get() {
let col = ColumnCodec::Int8Vector {
data: vec![1, 2, 3],
dimensions: 0,
};
assert_eq!(col.get(0), None);
}
#[test]
fn test_int8_vector_zero_dimensions_get_int8_vector() {
let col = ColumnCodec::Int8Vector {
data: vec![1, 2, 3],
dimensions: 0,
};
assert_eq!(col.get_int8_vector(0), None);
}
#[test]
fn test_int8_vector_zero_dimensions_len_and_is_empty() {
let col = ColumnCodec::Int8Vector {
data: vec![1, 2, 3],
dimensions: 0,
};
assert_eq!(col.len(), 0);
assert!(col.is_empty());
}
#[test]
fn test_heap_bytes_bitpacked() {
let values = vec![0u64, 5, 10, 15];
let bp = BitPackedInts::pack(&values);
let col = ColumnCodec::BitPacked(bp);
assert!(col.heap_bytes() > 0);
}
#[test]
fn test_heap_bytes_dict() {
let mut builder = DictionaryBuilder::new();
builder.add("Amsterdam");
builder.add("Berlin");
builder.add("Paris");
let dict = builder.build();
let col = ColumnCodec::Dict(dict);
assert!(col.heap_bytes() > 0);
}
#[test]
fn test_heap_bytes_bitmap() {
let bools = vec![true, false, true, true, false];
let bv = BitVector::from_bools(&bools);
let col = ColumnCodec::Bitmap(bv);
assert!(col.heap_bytes() > 0);
}
#[test]
fn test_heap_bytes_int8_vector() {
let data = vec![1i8, 2, 3, 4, 5, 6];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
assert_eq!(col.heap_bytes(), 6);
}
#[test]
fn test_find_in_range_bitpacked_inclusive() {
let values: Vec<u64> = (0..10).collect();
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(Some(&Value::Int64(3)), Some(&Value::Int64(6)), true, true);
assert_eq!(result, vec![3, 4, 5, 6]);
}
#[test]
fn test_find_in_range_bitpacked_exclusive() {
let values: Vec<u64> = (0..10).collect();
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result =
col.find_in_range(Some(&Value::Int64(3)), Some(&Value::Int64(6)), false, false);
assert_eq!(result, vec![4, 5]);
}
#[test]
fn test_find_in_range_bitpacked_open_ended() {
let values: Vec<u64> = (0..10).collect();
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(Some(&Value::Int64(7)), None, false, false);
assert_eq!(result, vec![8, 9]);
let result = col.find_in_range(None, Some(&Value::Int64(2)), false, true);
assert_eq!(result, vec![0, 1, 2]);
}
#[test]
fn test_find_in_range_fallback_for_dict() {
let mut builder = DictionaryBuilder::new();
for name in ["Amsterdam", "Berlin", "Paris", "Prague"] {
builder.add(name);
}
let col = ColumnCodec::Dict(builder.build());
let result = col.find_in_range(
Some(&Value::String("Berlin".into())),
Some(&Value::String("Prague".into())),
true,
true,
);
assert_eq!(result, vec![1, 2, 3]);
}
#[test]
fn test_find_in_range_negative_max() {
let values: Vec<u64> = (0..10).collect();
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(None, Some(&Value::Int64(-1)), false, true);
assert!(result.is_empty());
}
#[test]
fn test_find_in_range_negative_min() {
let values: Vec<u64> = (0..5).collect();
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(Some(&Value::Int64(-10)), None, true, true);
assert_eq!(result, vec![0, 1, 2, 3, 4]);
}
#[test]
fn test_find_in_range_type_mismatch_uses_fallback() {
let values = vec![1u64, 2, 3];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(
Some(&Value::String("a".into())),
Some(&Value::String("z".into())),
true,
true,
);
assert!(result.is_empty());
}
#[test]
fn test_find_in_range_int8_vector_uses_fallback() {
let data = vec![1i8, 2, 3, 4, 5, 6];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
let result = col.find_in_range(Some(&Value::Int64(0)), Some(&Value::Int64(10)), true, true);
assert!(result.is_empty());
}
#[test]
fn test_get_out_of_bounds_all_codecs() {
let bp = BitPackedInts::pack(&[1u64, 2, 3]);
let col = ColumnCodec::BitPacked(bp);
assert_eq!(col.get(3), None);
let mut builder = DictionaryBuilder::new();
builder.add("Alix");
let col = ColumnCodec::Dict(builder.build());
assert_eq!(col.get(1), None);
let bv = BitVector::from_bools(&[true]);
let col = ColumnCodec::Bitmap(bv);
assert_eq!(col.get(1), None);
let col = ColumnCodec::Int8Vector {
data: vec![1, 2, 3],
dimensions: 3,
};
assert_eq!(col.get(1), None);
assert_eq!(col.get_int8_vector(1), None);
}
#[test]
fn test_column_int8_vector_roundtrip() {
let dims: u16 = 384;
let rows = 100usize;
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
let data: Vec<i8> = (0..rows * dims as usize)
.map(|idx| (((idx * 7) % 251) as i64 - 120) as i8)
.collect();
let col = ColumnCodec::Int8Vector {
data: data.clone(),
dimensions: dims,
};
assert_eq!(col.len(), rows);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len(), "read_from should consume the full buffer");
assert_eq!(decoded.len(), rows);
for &row in &[0usize, 1, 50, 99] {
let decoded_slice = decoded.get_int8_vector(row).unwrap();
let start = row * dims as usize;
assert_eq!(decoded_slice, &data[start..start + dims as usize]);
let decoded_value = decoded.get(row).unwrap();
if let Value::List(items) = decoded_value {
assert_eq!(items.len(), dims as usize);
assert_eq!(items[0], Value::Int64(i64::from(decoded_slice[0])));
} else {
panic!("expected Value::List for Int8Vector element");
}
}
}
#[test]
fn test_column_vector_oob_and_zero_dim() {
let col = ColumnCodec::Int8Vector {
data: vec![1i8, 2, 3, 4, 5, 6],
dimensions: 3,
};
assert_eq!(col.len(), 2);
assert!(col.get(2).is_none());
assert!(col.get(5).is_none());
assert!(col.get_int8_vector(2).is_none());
assert!(col.get_int8_vector(5).is_none());
let zero = ColumnCodec::Int8Vector {
data: Vec::new(),
dimensions: 0,
};
assert_eq!(zero.len(), 0);
assert!(zero.is_empty());
assert!(zero.get(0).is_none());
assert!(zero.get_int8_vector(0).is_none());
}
#[test]
fn test_find_in_range_incompatible_types() {
let mut builder = DictionaryBuilder::new();
for city in ["Amsterdam", "Berlin", "Paris", "Prague", "Barcelona"] {
builder.add(city);
}
let col = ColumnCodec::Dict(builder.build());
let result =
col.find_in_range(Some(&Value::Int64(0)), Some(&Value::Int64(100)), true, true);
assert!(
result.is_empty(),
"Int64 bounds on a Dict column should yield no matches"
);
}
#[test]
fn test_column_serde_truncated_buffer() {
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&[1u64, 2, 3, 4, 5]));
let mut buf = Vec::new();
col.write_to(&mut buf);
assert!(buf.len() > 4);
let mut pos = 0;
assert!(ColumnCodec::read_from(&[], &mut pos).is_err());
let mut pos = 0;
assert!(ColumnCodec::read_from(&buf[..1], &mut pos).is_err());
let mut pos = 0;
assert!(ColumnCodec::read_from(&buf[..3], &mut pos).is_err());
let mut pos = 0;
assert!(ColumnCodec::read_from(&buf[..buf.len() - 1], &mut pos).is_err());
let mut pos = 0;
assert!(ColumnCodec::read_from(&[0xFFu8], &mut pos).is_err());
let mut bad = vec![3u8];
bad.extend_from_slice(&2u16.to_le_bytes());
bad.extend_from_slice(&4u32.to_le_bytes());
bad.extend_from_slice(&[0u8, 0u8]);
let mut pos = 0;
assert!(ColumnCodec::read_from(&bad, &mut pos).is_err());
}
#[test]
fn test_write_read_round_trip_bitpacked() {
let bp = BitPackedInts::pack(&[3u64, 7, 12, 5]);
let col = ColumnCodec::BitPacked(bp);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len(), "read should consume entire buffer");
assert_eq!(decoded.len(), 4);
for i in 0..4 {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_read_round_trip_dict() {
let mut b = DictionaryBuilder::new();
for s in ["Amsterdam", "Berlin", "Amsterdam", "Paris"] {
b.add(s);
}
let col = ColumnCodec::Dict(b.build());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), 4);
for i in 0..4 {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_read_round_trip_bitmap() {
let bv = BitVector::from_bools(&[true, false, true, true, false, false, true]);
let col = ColumnCodec::Bitmap(bv);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), 7);
for i in 0..7 {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_read_round_trip_int8_vector() {
let data: Vec<i8> = vec![1, -2, 3, -4, 5, -6, 7, -8];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 4,
};
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), 2);
assert_eq!(decoded.get_int8_vector(0), Some(&[1i8, -2, 3, -4][..]));
assert_eq!(decoded.get_int8_vector(1), Some(&[5i8, -6, 7, -8][..]));
}
#[test]
fn test_read_from_empty_buffer_errors() {
let mut pos = 0;
let err = ColumnCodec::read_from(&[], &mut pos).unwrap_err();
assert_eq!(err, "truncated codec discriminant");
}
#[test]
fn test_read_from_unknown_discriminant_errors() {
let buf = vec![99u8];
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "unknown codec discriminant");
}
#[test]
fn test_read_from_truncated_bitpacked_bits() {
let buf = vec![0u8];
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated bits_per_value");
}
#[test]
fn test_read_from_truncated_bitpacked_count() {
let buf = vec![0u8, 4, 0, 0]; let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u32");
}
#[test]
fn test_read_from_truncated_bitpacked_words() {
let mut buf = vec![0u8, 4];
buf.extend_from_slice(&1u32.to_le_bytes()); buf.extend_from_slice(&2u32.to_le_bytes()); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u64");
}
#[test]
fn test_read_from_truncated_dict_string() {
let mut buf = vec![1u8];
buf.extend_from_slice(&1u32.to_le_bytes()); buf.extend_from_slice(&5u32.to_le_bytes()); buf.extend_from_slice(b"abc"); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated dict string");
}
#[test]
fn test_read_from_invalid_utf8_in_dict() {
let mut buf = vec![1u8];
buf.extend_from_slice(&1u32.to_le_bytes()); buf.extend_from_slice(&2u32.to_le_bytes()); buf.extend_from_slice(&[0xFF, 0xFE]); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "invalid UTF-8 in dict");
}
#[test]
fn test_read_from_truncated_bitmap_words() {
let mut buf = vec![2u8];
buf.extend_from_slice(&64u32.to_le_bytes()); buf.extend_from_slice(&1u32.to_le_bytes()); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u64");
}
#[test]
fn test_read_from_truncated_int8_vector_dimensions() {
let buf = vec![3u8, 0];
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u16");
}
#[test]
fn test_read_from_truncated_int8_vector_data() {
let mut buf = vec![3u8];
buf.extend_from_slice(&2u16.to_le_bytes()); buf.extend_from_slice(&4u32.to_le_bytes()); buf.extend_from_slice(&[10u8, 20]); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated Int8Vector data");
}
#[test]
fn test_empty_bitpacked_round_trip() {
let bp = BitPackedInts::pack(&[]);
let col = ColumnCodec::BitPacked(bp);
assert!(col.is_empty());
assert_eq!(col.len(), 0);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert!(decoded.is_empty());
}
#[test]
fn test_empty_dict_round_trip() {
let builder = DictionaryBuilder::new();
let dict = builder.build();
let col = ColumnCodec::Dict(dict);
assert!(col.is_empty());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert!(decoded.is_empty());
}
#[test]
fn test_empty_bitmap_round_trip() {
let bv = BitVector::from_bools(&[]);
let col = ColumnCodec::Bitmap(bv);
assert!(col.is_empty());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert!(decoded.is_empty());
}
#[test]
fn test_empty_int8_vector_round_trip() {
let col = ColumnCodec::Int8Vector {
data: Vec::new(),
dimensions: 4,
};
assert!(col.is_empty());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(pos, buf.len());
assert!(decoded.is_empty());
}
#[test]
fn test_empty_string_in_dict() {
let mut b = DictionaryBuilder::new();
b.add("");
b.add("Alix");
b.add("");
let col = ColumnCodec::Dict(b.build());
assert_eq!(col.get(0), Some(Value::String(ArcStr::from(""))));
assert_eq!(col.get(1), Some(Value::String(ArcStr::from("Alix"))));
assert_eq!(col.get(2), Some(Value::String(ArcStr::from(""))));
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).unwrap();
assert_eq!(decoded.get(0), Some(Value::String(ArcStr::from(""))));
assert_eq!(decoded.get(2), Some(Value::String(ArcStr::from(""))));
}
#[test]
fn test_find_in_range_exact_boundaries_inclusive_vs_exclusive() {
let values = vec![10u64, 20, 30, 40, 50];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let inclusive =
col.find_in_range(Some(&Value::Int64(20)), Some(&Value::Int64(40)), true, true);
assert_eq!(inclusive, vec![1, 2, 3]);
let exclusive = col.find_in_range(
Some(&Value::Int64(20)),
Some(&Value::Int64(40)),
false,
false,
);
assert_eq!(exclusive, vec![2]);
let mixed_a = col.find_in_range(
Some(&Value::Int64(20)),
Some(&Value::Int64(40)),
true,
false,
);
assert_eq!(mixed_a, vec![1, 2]);
let mixed_b = col.find_in_range(
Some(&Value::Int64(20)),
Some(&Value::Int64(40)),
false,
true,
);
assert_eq!(mixed_b, vec![2, 3]);
}
#[test]
fn test_find_in_range_bitpacked_fallback_on_float_min() {
let values = vec![1u64, 2, 3];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(Some(&Value::Float64(2.5)), None, true, true);
assert_eq!(result, vec![2]);
}
#[test]
fn test_find_in_range_bitpacked_fallback_on_float_max() {
let values = vec![1u64, 2, 3];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let result = col.find_in_range(None, Some(&Value::Float64(2.5)), true, true);
assert_eq!(result, vec![0, 1]);
}
#[test]
fn test_find_in_range_open_both_ends_returns_all() {
let values = vec![1u64, 2, 3, 4, 5];
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&values));
let all = col.find_in_range(None, None, true, true);
assert_eq!(all, vec![0, 1, 2, 3, 4]);
}
#[test]
fn test_find_in_range_fallback_dict_exclusive() {
let mut b = DictionaryBuilder::new();
for name in ["Amsterdam", "Berlin", "Paris", "Prague"] {
b.add(name);
}
let col = ColumnCodec::Dict(b.build());
let result = col.find_in_range(Some(&Value::String("Berlin".into())), None, false, true);
assert_eq!(result, vec![2, 3]);
let result = col.find_in_range(None, Some(&Value::String("Prague".into())), true, false);
assert_eq!(result, vec![0, 1, 2]); }
#[test]
fn test_find_in_range_fallback_mismatch_returns_none_for_row() {
let col = ColumnCodec::Bitmap(BitVector::from_bools(&[true, false, true]));
let result = col.find_in_range(Some(&Value::Int64(0)), Some(&Value::Int64(5)), true, true);
assert!(result.is_empty());
}
#[test]
fn test_get_raw_u64_returns_none_for_all_non_bitpacked() {
let mut b = DictionaryBuilder::new();
b.add("x");
assert_eq!(ColumnCodec::Dict(b.build()).get_raw_u64(0), None);
assert_eq!(
ColumnCodec::Int8Vector {
data: vec![1i8],
dimensions: 1
}
.get_raw_u64(0),
None
);
}
#[test]
fn test_get_int8_vector_returns_none_for_all_non_vector() {
let bp = BitPackedInts::pack(&[1u64]);
assert_eq!(ColumnCodec::BitPacked(bp).get_int8_vector(0), None);
let mut b = DictionaryBuilder::new();
b.add("x");
assert_eq!(ColumnCodec::Dict(b.build()).get_int8_vector(0), None);
let bv = BitVector::from_bools(&[true]);
assert_eq!(ColumnCodec::Bitmap(bv).get_int8_vector(0), None);
}
#[test]
fn test_heap_bytes_empty_columns() {
let bp = BitPackedInts::pack(&[]);
assert_eq!(ColumnCodec::BitPacked(bp).heap_bytes(), 0);
let builder = DictionaryBuilder::new();
assert_eq!(ColumnCodec::Dict(builder.build()).heap_bytes(), 0);
let col = ColumnCodec::Int8Vector {
data: Vec::new(),
dimensions: 4,
};
assert_eq!(col.heap_bytes(), 0);
}
#[test]
fn test_find_eq_dict_target_not_in_dictionary() {
let mut b = DictionaryBuilder::new();
b.add("Amsterdam");
b.add("Berlin");
let col = ColumnCodec::Dict(b.build());
let result = col.find_eq(&Value::String(ArcStr::from("Prague")));
assert!(result.is_empty());
}
#[test]
fn test_get_raw_u64_on_dict_and_int8_vector_returns_none() {
let mut builder = DictionaryBuilder::new();
builder.add("Vincent");
let dict_col = ColumnCodec::Dict(builder.build());
assert_eq!(dict_col.get_raw_u64(0), None);
let vec_col = ColumnCodec::Int8Vector {
data: vec![1i8, 2, 3],
dimensions: 3,
};
assert_eq!(vec_col.get_raw_u64(0), None);
}
#[test]
fn test_get_int8_vector_on_dict_and_bitmap_returns_none() {
let mut builder = DictionaryBuilder::new();
builder.add("Jules");
let dict_col = ColumnCodec::Dict(builder.build());
assert_eq!(dict_col.get_int8_vector(0), None);
let bm_col = ColumnCodec::Bitmap(BitVector::from_bools(&[true, false]));
assert_eq!(bm_col.get_int8_vector(0), None);
}
#[test]
fn test_find_in_range_dict_exclusive_bounds() {
let mut builder = DictionaryBuilder::new();
for name in ["Amsterdam", "Berlin", "Paris", "Prague"] {
builder.add(name);
}
let col = ColumnCodec::Dict(builder.build());
let result = col.find_in_range(
Some(&Value::String("Amsterdam".into())),
Some(&Value::String("Prague".into())),
false,
false,
);
assert_eq!(result, vec![1, 2]);
}
#[test]
fn test_find_in_range_dict_open_bounds() {
let mut builder = DictionaryBuilder::new();
for name in ["Amsterdam", "Berlin", "Paris", "Prague"] {
builder.add(name);
}
let col = ColumnCodec::Dict(builder.build());
let result = col.find_in_range(None, Some(&Value::String("Berlin".into())), true, true);
assert_eq!(result, vec![0, 1]);
let result = col.find_in_range(Some(&Value::String("Paris".into())), None, true, true);
assert_eq!(result, vec![2, 3]);
}
#[test]
fn test_find_in_range_fallback_uncomparable_skips_rows() {
let data = vec![1i8, 2, 3];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 3,
};
let min = Value::Int64(0);
let max = Value::Int64(10);
let result = col.find_in_range(Some(&min), None, true, true);
assert!(result.is_empty());
let result = col.find_in_range(None, Some(&max), true, true);
assert!(result.is_empty());
}
#[test]
fn test_write_to_read_from_bitpacked_round_trip() {
let values = vec![0u64, 5, 10, 15, 3, 7];
let bp = BitPackedInts::pack(&values);
let col = ColumnCodec::BitPacked(bp);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), col.len());
for i in 0..col.len() {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_to_read_from_dict_round_trip() {
let mut builder = DictionaryBuilder::new();
for name in ["Vincent", "Jules", "Vincent", "Mia"] {
builder.add(name);
}
let col = ColumnCodec::Dict(builder.build());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), col.len());
for i in 0..col.len() {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_to_read_from_bitmap_round_trip() {
let bools = vec![true, false, true, true, false, false, true];
let col = ColumnCodec::Bitmap(BitVector::from_bools(&bools));
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), col.len());
for i in 0..col.len() {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_to_read_from_int8_vector_round_trip() {
let data = vec![1i8, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12];
let col = ColumnCodec::Int8Vector {
data,
dimensions: 4,
};
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), col.len());
for i in 0..col.len() {
assert_eq!(decoded.get_int8_vector(i), col.get_int8_vector(i));
}
}
#[test]
fn test_read_from_truncated_discriminant() {
let data: &[u8] = &[];
let mut pos = 0;
let err = ColumnCodec::read_from(data, &mut pos).unwrap_err();
assert_eq!(err, "truncated codec discriminant");
}
#[test]
fn test_read_from_unknown_discriminant() {
let data: &[u8] = &[42];
let mut pos = 0;
let err = ColumnCodec::read_from(data, &mut pos).unwrap_err();
assert_eq!(err, "unknown codec discriminant");
}
#[test]
fn test_read_from_truncated_bits_per_value() {
let data: &[u8] = &[0];
let mut pos = 0;
let err = ColumnCodec::read_from(data, &mut pos).unwrap_err();
assert_eq!(err, "truncated bits_per_value");
}
#[test]
fn test_read_from_truncated_bitpacked_word() {
let mut buf = vec![0u8, 4];
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&1u32.to_le_bytes());
buf.extend_from_slice(&[0u8, 0, 0]); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u64");
}
#[test]
fn test_read_from_dict_truncated_string() {
let mut buf = vec![1u8];
buf.extend_from_slice(&1u32.to_le_bytes()); buf.extend_from_slice(&5u32.to_le_bytes()); buf.extend_from_slice(b"ab");
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated dict string");
}
#[test]
fn test_read_from_dict_invalid_utf8() {
let mut buf = vec![1u8];
buf.extend_from_slice(&1u32.to_le_bytes()); buf.extend_from_slice(&2u32.to_le_bytes()); buf.extend_from_slice(&[0xFFu8, 0xFE]); let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "invalid UTF-8 in dict");
}
#[test]
fn test_read_from_int8_vector_truncated_data() {
let mut buf = vec![3u8];
buf.extend_from_slice(&2u16.to_le_bytes()); buf.extend_from_slice(&6u32.to_le_bytes()); buf.extend_from_slice(&[1u8, 2, 3]);
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated Int8Vector data");
}
#[test]
fn test_read_from_int8_vector_truncated_dimensions() {
let buf = vec![3u8, 0];
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u16");
}
#[test]
fn test_read_from_bitmap_truncated() {
let buf = vec![2u8, 0, 0];
let mut pos = 0;
let err = ColumnCodec::read_from(&buf, &mut pos).unwrap_err();
assert_eq!(err, "truncated u32");
}
#[test]
fn test_write_to_read_from_empty_bitpacked() {
let col = ColumnCodec::BitPacked(BitPackedInts::pack(&[]));
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(decoded.len(), 0);
assert!(decoded.is_empty());
}
#[test]
fn test_write_to_read_from_empty_bitmap() {
let col = ColumnCodec::Bitmap(BitVector::from_bools(&[]));
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert!(decoded.is_empty());
}
#[test]
fn test_write_to_read_from_empty_int8_vector() {
let col = ColumnCodec::Int8Vector {
data: Vec::new(),
dimensions: 4,
};
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(decoded.len(), 0);
}
#[test]
fn test_raw_i64_get_decodes_as_int64() {
let col = ColumnCodec::RawI64(vec![-100, 0, 42, i64::MIN, i64::MAX]);
assert_eq!(col.len(), 5);
assert_eq!(col.get(0), Some(Value::Int64(-100)));
assert_eq!(col.get(1), Some(Value::Int64(0)));
assert_eq!(col.get(2), Some(Value::Int64(42)));
assert_eq!(col.get(3), Some(Value::Int64(i64::MIN)));
assert_eq!(col.get(4), Some(Value::Int64(i64::MAX)));
assert_eq!(col.get(5), None);
}
#[test]
fn test_raw_i64_find_eq() {
let col = ColumnCodec::RawI64(vec![-50, 10, -50, 20, 0, -50]);
assert_eq!(col.find_eq(&Value::Int64(-50)), vec![0, 2, 5]);
assert_eq!(col.find_eq(&Value::Int64(10)), vec![1]);
assert_eq!(col.find_eq(&Value::Int64(0)), vec![4]);
assert_eq!(col.find_eq(&Value::Int64(999)), Vec::<usize>::new());
assert_eq!(col.find_eq(&Value::Float64(10.0)), Vec::<usize>::new());
}
#[test]
fn test_raw_i64_find_in_range_signed_ordering() {
let col = ColumnCodec::RawI64(vec![-10, -5, 0, 5, 10, -100, 100]);
let result = col.find_in_range(Some(&Value::Int64(-5)), Some(&Value::Int64(5)), true, true);
assert_eq!(result, vec![1, 2, 3]);
let result = col.find_in_range(
Some(&Value::Int64(-5)),
Some(&Value::Int64(5)),
false,
false,
);
assert_eq!(result, vec![2]);
let result = col.find_in_range(None, Some(&Value::Int64(0)), false, false);
assert_eq!(result, vec![0, 1, 5]);
let result = col.find_in_range(Some(&Value::Int64(10)), None, true, true);
assert_eq!(result, vec![4, 6]);
}
#[test]
fn test_write_to_read_from_raw_i64_round_trip() {
let col = ColumnCodec::RawI64(vec![-42, 0, 1, i64::MIN, i64::MAX, -1_000_000_000]);
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(pos, buf.len());
assert_eq!(decoded.len(), col.len());
for i in 0..col.len() {
assert_eq!(decoded.get(i), col.get(i));
}
}
#[test]
fn test_write_to_read_from_empty_raw_i64() {
let col = ColumnCodec::RawI64(Vec::new());
let mut buf = Vec::new();
col.write_to(&mut buf);
let mut pos = 0;
let decoded = ColumnCodec::read_from(&buf, &mut pos).expect("decode should succeed");
assert_eq!(decoded.len(), 0);
}
#[test]
fn test_raw_i64_heap_bytes() {
let col = ColumnCodec::RawI64(vec![-1, 2, -3]);
assert_eq!(col.heap_bytes(), 3 * std::mem::size_of::<i64>());
let empty = ColumnCodec::RawI64(Vec::new());
assert_eq!(empty.heap_bytes(), 0);
}
}