pub mod delta;
pub mod dictionary;
pub mod encoding;
pub mod fsst;
pub mod reader;
pub mod rle;
pub mod statistics;
pub mod types;
pub mod writer;
pub use reader::read_columnar;
pub use statistics::{
filter_table, read_columnar_with_columns, select_columns, split_into_row_groups, ColumnStats,
Predicate, RowGroup, RowGroupConfig, TableStats,
};
pub use types::{Column, ColumnData, ColumnTypeTag, ColumnarTable, EncodingType};
pub use writer::{write_columnar, write_columnar_with_options, ColumnarWriteOptions};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_columnar_roundtrip_all_types() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_roundtrip");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("all_types.scircol");
let table = ColumnarTable::from_columns(vec![
Column::float64("f64_col", vec![1.0, 2.5, std::f64::consts::PI, -0.5, 100.0]),
Column::int64("i64_col", vec![10, 20, 30, 40, 50]),
Column::string(
"str_col",
vec![
"hello".into(),
"world".into(),
"foo".into(),
"bar".into(),
"baz".into(),
],
),
Column::boolean("bool_col", vec![true, false, true, true, false]),
])
.expect("Failed to create table");
write_columnar(&path, &table).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
assert_eq!(loaded.num_rows(), 5);
assert_eq!(loaded.num_columns(), 4);
let f64_data = loaded.get_f64("f64_col").expect("Failed to get f64");
assert!((f64_data[2] - std::f64::consts::PI).abs() < 1e-10);
let i64_data = loaded.get_i64("i64_col").expect("Failed to get i64");
assert_eq!(i64_data[3], 40);
let str_data = loaded.get_str("str_col").expect("Failed to get str");
assert_eq!(str_data[1], "world");
let bool_data = loaded.get_bool("bool_col").expect("Failed to get bool");
assert!(!bool_data[1]);
assert!(bool_data[0]);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_rle_encoding() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_rle");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("rle.scircol");
let values: Vec<f64> = [1.0f64; 100]
.into_iter()
.chain([2.0f64; 100])
.chain([3.0f64; 100])
.collect();
let table = ColumnarTable::from_columns(vec![Column::float64("runs", values.clone())])
.expect("Failed to create table");
write_columnar(&path, &table).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
let data = loaded.get_f64("runs").expect("Failed to get f64");
assert_eq!(data.len(), 300);
assert_eq!(data[0], 1.0);
assert_eq!(data[100], 2.0);
assert_eq!(data[200], 3.0);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_dictionary_encoding() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_dict");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("dict.scircol");
let categories = ["red", "green", "blue"];
let mut values = Vec::new();
for i in 0..300 {
values.push(categories[i % 3].to_string());
}
let table = ColumnarTable::from_columns(vec![Column::string("color", values.clone())])
.expect("Failed to create table");
write_columnar(&path, &table).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
let data = loaded.get_str("color").expect("Failed to get str");
assert_eq!(data.len(), 300);
assert_eq!(data[0], "red");
assert_eq!(data[1], "green");
assert_eq!(data[2], "blue");
assert_eq!(data[3], "red");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_delta_encoding() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_delta");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("delta.scircol");
let sorted_ints: Vec<i64> = (0..1000).collect();
let table = ColumnarTable::from_columns(vec![Column::int64("sorted", sorted_ints.clone())])
.expect("Failed to create table");
write_columnar(&path, &table).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
let data = loaded.get_i64("sorted").expect("Failed to get i64");
assert_eq!(data.len(), 1000);
for (i, &val) in data.iter().enumerate() {
assert_eq!(val, i as i64);
}
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_forced_plain_encoding() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_plain");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("plain.scircol");
let table = ColumnarTable::from_columns(vec![Column::float64(
"values",
vec![1.0, 2.0, 3.0, 4.0, 5.0],
)])
.expect("Failed to create table");
let options = ColumnarWriteOptions {
encoding: Some(EncodingType::Plain),
};
write_columnar_with_options(&path, &table, options).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
let data = loaded.get_f64("values").expect("Failed to get f64");
assert_eq!(data, &[1.0, 2.0, 3.0, 4.0, 5.0]);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_empty_table() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_empty");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("empty.scircol");
let table = ColumnarTable::new();
write_columnar(&path, &table).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
assert_eq!(loaded.num_rows(), 0);
assert_eq!(loaded.num_columns(), 0);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn test_columnar_mismatched_lengths() {
let result = ColumnarTable::from_columns(vec![
Column::float64("a", vec![1.0, 2.0]),
Column::int64("b", vec![1, 2, 3]),
]);
assert!(result.is_err());
}
#[test]
fn test_columnar_duplicate_column_names() {
let result = ColumnarTable::from_columns(vec![
Column::float64("x", vec![1.0]),
Column::float64("x", vec![2.0]),
]);
assert!(result.is_err());
}
#[test]
fn test_columnar_column_access() {
let table = ColumnarTable::from_columns(vec![
Column::float64("a", vec![1.0]),
Column::int64("b", vec![2]),
])
.expect("Failed to create table");
assert_eq!(table.column_names(), vec!["a", "b"]);
assert!(table.column("a").is_ok());
assert!(table.column("nonexistent").is_err());
assert!(table.column_by_index(0).is_ok());
assert!(table.column_by_index(99).is_err());
}
#[test]
fn test_columnar_type_mismatch_access() {
let table = ColumnarTable::from_columns(vec![Column::float64("a", vec![1.0])])
.expect("Failed to create table");
assert!(table.get_f64("a").is_ok());
assert!(table.get_i64("a").is_err());
assert!(table.get_str("a").is_err());
assert!(table.get_bool("a").is_err());
}
#[test]
fn test_columnar_bool_packing() {
let dir = std::env::temp_dir().join("scirs2_columnar_test_bool");
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("bools.scircol");
let bools = vec![
true, false, true, true, false, true, false, false, true, true, false, true, true,
];
let table = ColumnarTable::from_columns(vec![Column::boolean("flags", bools.clone())])
.expect("Failed to create table");
let options = ColumnarWriteOptions {
encoding: Some(EncodingType::Plain),
};
write_columnar_with_options(&path, &table, options).expect("Failed to write");
let loaded = read_columnar(&path).expect("Failed to read");
let data = loaded.get_bool("flags").expect("Failed to get bool");
assert_eq!(data, &bools);
let _ = std::fs::remove_dir_all(&dir);
}
}