use std::collections::HashMap;
use sochdb_core::{Result, SochDBError, SochValue};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PackedColumnType {
Bool,
Int64,
UInt64,
Float64,
Text,
Binary,
Null,
}
impl PackedColumnType {
#[inline]
pub fn from_byte(b: u8) -> Option<Self> {
match b {
0 => Some(Self::Null),
1 => Some(Self::Bool),
2 => Some(Self::Int64),
3 => Some(Self::UInt64),
4 => Some(Self::Float64),
5 => Some(Self::Text),
6 => Some(Self::Binary),
_ => None,
}
}
#[inline]
pub fn to_byte(self) -> u8 {
match self {
Self::Null => 0,
Self::Bool => 1,
Self::Int64 => 2,
Self::UInt64 => 3,
Self::Float64 => 4,
Self::Text => 5,
Self::Binary => 6,
}
}
}
#[derive(Debug, Clone)]
pub struct PackedColumnDef {
pub name: String,
pub col_type: PackedColumnType,
pub nullable: bool,
}
#[derive(Debug, Clone)]
pub struct PackedTableSchema {
pub name: String,
pub columns: Vec<PackedColumnDef>,
}
impl PackedTableSchema {
pub fn new(name: impl Into<String>, columns: Vec<PackedColumnDef>) -> Self {
Self {
name: name.into(),
columns,
}
}
#[inline]
pub fn column_index(&self, name: &str) -> Option<usize> {
self.columns.iter().position(|c| c.name == name)
}
#[inline]
pub fn column(&self, idx: usize) -> Option<&PackedColumnDef> {
self.columns.get(idx)
}
#[inline]
pub fn num_columns(&self) -> usize {
self.columns.len()
}
}
#[repr(C)]
pub struct PackedRow {
data: Vec<u8>,
num_cols: u16,
null_bitmap_size: usize,
}
impl PackedRow {
#[inline]
fn buffer_size(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> usize {
let k = schema.columns.len();
let null_bitmap_size = k.div_ceil(8);
let offsets_size = k * 4;
let data_size: usize = schema
.columns
.iter()
.map(|col| Self::value_size(values.get(&col.name)))
.sum();
null_bitmap_size + offsets_size + data_size
}
#[inline]
fn value_size(value: Option<&SochValue>) -> usize {
match value {
None | Some(SochValue::Null) => 0,
Some(SochValue::Bool(_)) => 1,
Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
Some(SochValue::Text(s)) => 4 + s.len(),
Some(SochValue::Binary(b)) => 4 + b.len(),
_ => 0, }
}
pub fn pack(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> Self {
let k = schema.columns.len();
let null_bitmap_size = k.div_ceil(8);
let total_size = Self::buffer_size(schema, values);
let mut data = Vec::with_capacity(total_size);
let mut null_bits = vec![0u8; null_bitmap_size];
for (i, col) in schema.columns.iter().enumerate() {
match values.get(&col.name) {
None | Some(SochValue::Null) => {
null_bits[i / 8] |= 1 << (i % 8);
}
_ => {}
}
}
data.extend_from_slice(&null_bits);
let offsets_start = data.len();
data.resize(offsets_start + k * 4, 0);
let data_start = offsets_start + k * 4;
for (i, col) in schema.columns.iter().enumerate() {
let offset = (data.len() - data_start) as u32;
let offset_pos = offsets_start + i * 4;
data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
if let Some(value) = values.get(&col.name) {
Self::write_value(&mut data, value);
}
}
Self {
data,
num_cols: k as u16,
null_bitmap_size,
}
}
#[inline]
pub fn pack_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> Self {
let k = schema.columns.len();
debug_assert_eq!(
values.len(),
k,
"values slice must match schema column count"
);
let null_bitmap_size = k.div_ceil(8);
let total_size = Self::buffer_size_slice(schema, values);
if total_size <= 512 {
Self::pack_slice_small(schema, values, k, null_bitmap_size, total_size)
} else {
Self::pack_slice_large(schema, values, k, null_bitmap_size, total_size)
}
}
#[inline]
fn pack_slice_small(
_schema: &PackedTableSchema,
values: &[Option<&SochValue>],
k: usize,
null_bitmap_size: usize,
total_size: usize,
) -> Self {
let mut stack_buf = [0u8; 512];
let buf = &mut stack_buf[..total_size];
for (i, val) in values.iter().enumerate() {
match val {
None | Some(SochValue::Null) => {
buf[i / 8] |= 1 << (i % 8);
}
_ => {}
}
}
let offsets_start = null_bitmap_size;
let data_start = offsets_start + k * 4;
let mut data_pos = data_start;
for (i, val) in values.iter().enumerate() {
let offset = (data_pos - data_start) as u32;
let offset_pos = offsets_start + i * 4;
buf[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
if let Some(value) = val {
data_pos += Self::write_value_to_slice(&mut buf[data_pos..], value);
}
}
Self {
data: buf[..total_size].to_vec(),
num_cols: k as u16,
null_bitmap_size,
}
}
#[inline]
fn pack_slice_large(
_schema: &PackedTableSchema,
values: &[Option<&SochValue>],
k: usize,
null_bitmap_size: usize,
total_size: usize,
) -> Self {
let mut data = Vec::with_capacity(total_size);
let mut null_bits = vec![0u8; null_bitmap_size];
for (i, val) in values.iter().enumerate() {
match val {
None | Some(SochValue::Null) => {
null_bits[i / 8] |= 1 << (i % 8);
}
_ => {}
}
}
data.extend_from_slice(&null_bits);
let offsets_start = data.len();
data.resize(offsets_start + k * 4, 0);
let data_start = offsets_start + k * 4;
for (i, val) in values.iter().enumerate() {
let offset = (data.len() - data_start) as u32;
let offset_pos = offsets_start + i * 4;
data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
if let Some(value) = val {
Self::write_value(&mut data, value);
}
}
Self {
data,
num_cols: k as u16,
null_bitmap_size,
}
}
#[inline]
fn write_value_to_slice(buf: &mut [u8], value: &SochValue) -> usize {
match value {
SochValue::Null => 0,
SochValue::Bool(b) => {
buf[0] = if *b { 1 } else { 0 };
1
}
SochValue::Int(i) => {
buf[..8].copy_from_slice(&i.to_le_bytes());
8
}
SochValue::UInt(u) => {
buf[..8].copy_from_slice(&u.to_le_bytes());
8
}
SochValue::Float(f) => {
buf[..8].copy_from_slice(&f.to_bits().to_le_bytes());
8
}
SochValue::Text(s) => {
let len = s.len() as u32;
buf[..4].copy_from_slice(&len.to_le_bytes());
buf[4..4 + s.len()].copy_from_slice(s.as_bytes());
4 + s.len()
}
SochValue::Binary(b) => {
let len = b.len() as u32;
buf[..4].copy_from_slice(&len.to_le_bytes());
buf[4..4 + b.len()].copy_from_slice(b);
4 + b.len()
}
_ => 0,
}
}
#[inline]
fn buffer_size_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> usize {
let k = schema.columns.len();
let null_bitmap_size = k.div_ceil(8);
let offsets_size = k * 4;
let data_size: usize = values
.iter()
.map(|v| match v {
None | Some(SochValue::Null) => 0,
Some(SochValue::Bool(_)) => 1,
Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
Some(SochValue::Text(s)) => 4 + s.len(),
Some(SochValue::Binary(b)) => 4 + b.len(),
_ => 0,
})
.sum();
null_bitmap_size + offsets_size + data_size
}
#[inline]
pub fn unpack_to_vec(&self, schema: &PackedTableSchema) -> Vec<SochValue> {
let k = schema.columns.len();
let mut result = Vec::with_capacity(k);
for (i, col) in schema.columns.iter().enumerate() {
result.push(self.get_column(i, col.col_type).unwrap_or(SochValue::Null));
}
result
}
#[inline]
fn write_value(buf: &mut Vec<u8>, value: &SochValue) {
match value {
SochValue::Null => {}
SochValue::Bool(b) => buf.push(if *b { 1 } else { 0 }),
SochValue::Int(i) => buf.extend_from_slice(&i.to_le_bytes()),
SochValue::UInt(u) => buf.extend_from_slice(&u.to_le_bytes()),
SochValue::Float(f) => buf.extend_from_slice(&f.to_le_bytes()),
SochValue::Text(s) => {
buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
buf.extend_from_slice(s.as_bytes());
}
SochValue::Binary(b) => {
buf.extend_from_slice(&(b.len() as u32).to_le_bytes());
buf.extend_from_slice(b);
}
_ => {} }
}
#[inline]
pub fn get_column(&self, idx: usize, col_type: PackedColumnType) -> Option<SochValue> {
if idx >= self.num_cols as usize {
return None;
}
let k = self.num_cols as usize;
let null_byte = self.data[idx / 8];
if (null_byte & (1 << (idx % 8))) != 0 {
return Some(SochValue::Null);
}
let offset_pos = self.null_bitmap_size + idx * 4;
let offset = u32::from_le_bytes([
self.data[offset_pos],
self.data[offset_pos + 1],
self.data[offset_pos + 2],
self.data[offset_pos + 3],
]) as usize;
let data_start = self.null_bitmap_size + k * 4;
let value_start = data_start + offset;
if value_start >= self.data.len() {
return Some(SochValue::Null);
}
Some(Self::read_value(&self.data[value_start..], col_type))
}
#[inline]
fn read_value(data: &[u8], col_type: PackedColumnType) -> SochValue {
match col_type {
PackedColumnType::Null => SochValue::Null,
PackedColumnType::Bool => {
if data.is_empty() {
SochValue::Null
} else {
SochValue::Bool(data[0] != 0)
}
}
PackedColumnType::Int64 => {
if data.len() < 8 {
SochValue::Null
} else {
let bytes: [u8; 8] = data[..8].try_into().unwrap();
SochValue::Int(i64::from_le_bytes(bytes))
}
}
PackedColumnType::UInt64 => {
if data.len() < 8 {
SochValue::Null
} else {
let bytes: [u8; 8] = data[..8].try_into().unwrap();
SochValue::UInt(u64::from_le_bytes(bytes))
}
}
PackedColumnType::Float64 => {
if data.len() < 8 {
SochValue::Null
} else {
let bytes: [u8; 8] = data[..8].try_into().unwrap();
SochValue::Float(f64::from_le_bytes(bytes))
}
}
PackedColumnType::Text => {
if data.len() < 4 {
SochValue::Null
} else {
let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
if data.len() < 4 + len {
SochValue::Null
} else {
match std::str::from_utf8(&data[4..4 + len]) {
Ok(s) => SochValue::Text(s.to_string()),
Err(_) => SochValue::Null,
}
}
}
}
PackedColumnType::Binary => {
if data.len() < 4 {
SochValue::Null
} else {
let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
if data.len() < 4 + len {
SochValue::Null
} else {
SochValue::Binary(data[4..4 + len].to_vec())
}
}
}
}
}
#[inline]
pub fn get_by_name(&self, schema: &PackedTableSchema, name: &str) -> Option<SochValue> {
let idx = schema.column_index(name)?;
let col = schema.column(idx)?;
self.get_column(idx, col.col_type)
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.data
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.data
}
pub fn from_bytes(data: Vec<u8>, num_cols: usize) -> Result<Self> {
let null_bitmap_size = num_cols.div_ceil(8);
let min_size = null_bitmap_size + num_cols * 4;
if data.len() < min_size {
return Err(SochDBError::Internal(format!(
"PackedRow data too short: {} < {}",
data.len(),
min_size
)));
}
Ok(Self {
data,
num_cols: num_cols as u16,
null_bitmap_size,
})
}
pub fn unpack(&self, schema: &PackedTableSchema) -> HashMap<String, SochValue> {
let mut result = HashMap::with_capacity(schema.columns.len());
for (i, col) in schema.columns.iter().enumerate() {
if let Some(value) = self.get_column(i, col.col_type)
&& (!matches!(value, SochValue::Null) || col.nullable)
{
result.insert(col.name.clone(), value);
}
}
result
}
#[inline]
pub fn num_columns(&self) -> usize {
self.num_cols as usize
}
#[inline]
pub fn size(&self) -> usize {
self.data.len()
}
}
pub struct PackedRowBuilder {
schema: PackedTableSchema,
values: HashMap<String, SochValue>,
}
impl PackedRowBuilder {
pub fn new(schema: PackedTableSchema) -> Self {
let capacity = schema.columns.len();
Self {
schema,
values: HashMap::with_capacity(capacity),
}
}
pub fn set(mut self, name: impl Into<String>, value: SochValue) -> Self {
self.values.insert(name.into(), value);
self
}
pub fn set_int(self, name: impl Into<String>, value: i64) -> Self {
self.set(name, SochValue::Int(value))
}
pub fn set_text(self, name: impl Into<String>, value: impl Into<String>) -> Self {
self.set(name, SochValue::Text(value.into()))
}
pub fn set_float(self, name: impl Into<String>, value: f64) -> Self {
self.set(name, SochValue::Float(value))
}
pub fn set_bool(self, name: impl Into<String>, value: bool) -> Self {
self.set(name, SochValue::Bool(value))
}
pub fn build(self) -> PackedRow {
PackedRow::pack(&self.schema, &self.values)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_schema() -> PackedTableSchema {
PackedTableSchema::new(
"test",
vec![
PackedColumnDef {
name: "id".into(),
col_type: PackedColumnType::Int64,
nullable: false,
},
PackedColumnDef {
name: "name".into(),
col_type: PackedColumnType::Text,
nullable: false,
},
PackedColumnDef {
name: "score".into(),
col_type: PackedColumnType::Float64,
nullable: true,
},
PackedColumnDef {
name: "active".into(),
col_type: PackedColumnType::Bool,
nullable: true,
},
],
)
}
#[test]
fn test_pack_unpack_roundtrip() {
let schema = test_schema();
let mut values = HashMap::new();
values.insert("id".to_string(), SochValue::Int(42));
values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
values.insert("score".to_string(), SochValue::Float(98.5));
values.insert("active".to_string(), SochValue::Bool(true));
let packed = PackedRow::pack(&schema, &values);
assert_eq!(
packed.get_column(0, PackedColumnType::Int64),
Some(SochValue::Int(42))
);
assert_eq!(
packed.get_column(1, PackedColumnType::Text),
Some(SochValue::Text("Alice".to_string()))
);
assert_eq!(
packed.get_column(2, PackedColumnType::Float64),
Some(SochValue::Float(98.5))
);
assert_eq!(
packed.get_column(3, PackedColumnType::Bool),
Some(SochValue::Bool(true))
);
let unpacked = packed.unpack(&schema);
assert_eq!(unpacked.get("id"), Some(&SochValue::Int(42)));
assert_eq!(
unpacked.get("name"),
Some(&SochValue::Text("Alice".to_string()))
);
}
#[test]
fn test_null_handling() {
let schema = test_schema();
let mut values = HashMap::new();
values.insert("id".to_string(), SochValue::Int(1));
values.insert("name".to_string(), SochValue::Text("Bob".to_string()));
let packed = PackedRow::pack(&schema, &values);
assert_eq!(
packed.get_column(0, PackedColumnType::Int64),
Some(SochValue::Int(1))
);
assert_eq!(
packed.get_column(2, PackedColumnType::Float64),
Some(SochValue::Null)
);
assert_eq!(
packed.get_column(3, PackedColumnType::Bool),
Some(SochValue::Null)
);
}
#[test]
fn test_bytes_roundtrip() {
let schema = test_schema();
let mut values = HashMap::new();
values.insert("id".to_string(), SochValue::Int(100));
values.insert("name".to_string(), SochValue::Text("Test".to_string()));
let packed = PackedRow::pack(&schema, &values);
let bytes = packed.as_bytes().to_vec();
let restored = PackedRow::from_bytes(bytes, schema.columns.len()).unwrap();
assert_eq!(
restored.get_column(0, PackedColumnType::Int64),
Some(SochValue::Int(100))
);
assert_eq!(
restored.get_column(1, PackedColumnType::Text),
Some(SochValue::Text("Test".to_string()))
);
}
#[test]
fn test_builder() {
let schema = test_schema();
let packed = PackedRowBuilder::new(schema.clone())
.set_int("id", 99)
.set_text("name", "Builder Test")
.set_float("score", 77.5)
.set_bool("active", false)
.build();
assert_eq!(packed.get_by_name(&schema, "id"), Some(SochValue::Int(99)));
assert_eq!(
packed.get_by_name(&schema, "name"),
Some(SochValue::Text("Builder Test".to_string()))
);
assert_eq!(
packed.get_by_name(&schema, "score"),
Some(SochValue::Float(77.5))
);
assert_eq!(
packed.get_by_name(&schema, "active"),
Some(SochValue::Bool(false))
);
}
#[test]
fn test_size_reduction() {
let schema = test_schema();
let mut values = HashMap::new();
values.insert("id".to_string(), SochValue::Int(42));
values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
values.insert("score".to_string(), SochValue::Float(98.5));
values.insert("active".to_string(), SochValue::Bool(true));
let packed = PackedRow::pack(&schema, &values);
assert!(packed.size() < 50, "Packed row should be compact");
}
}