use crate::encoders::algorithms::schema::types::{
FLAG_HAS_NULLS, FLAG_HAS_ROOT_KEY, FieldType, IntermediateRepresentation, SchemaValue,
};
pub fn pack(ir: &IntermediateRepresentation) -> Vec<u8> {
let mut buffer = Vec::new();
pack_header(&mut buffer, ir);
pack_values(&mut buffer, ir);
buffer
}
fn pack_header(buffer: &mut Vec<u8>, ir: &IntermediateRepresentation) {
let header = &ir.header;
buffer.push(header.flags);
if header.has_flag(FLAG_HAS_ROOT_KEY)
&& let Some(key) = header.root_key.as_ref()
{
encode_varint(buffer, key.len() as u64);
buffer.extend_from_slice(key.as_bytes());
}
encode_varint(buffer, header.row_count as u64);
encode_varint(buffer, header.fields.len() as u64);
pack_field_types(buffer, ir);
for field in &header.fields {
encode_varint(buffer, field.name.len() as u64);
buffer.extend_from_slice(field.name.as_bytes());
}
if header.has_flag(FLAG_HAS_NULLS)
&& let Some(bitmap) = header.null_bitmap.as_ref()
{
buffer.extend_from_slice(bitmap);
}
}
fn pack_field_types(buffer: &mut Vec<u8>, ir: &IntermediateRepresentation) {
let mut type_buffer = Vec::new();
let mut nibble_count = 0;
for field in &ir.header.fields {
pack_field_type_recursive(&mut type_buffer, &field.field_type, &mut nibble_count);
}
encode_varint(buffer, type_buffer.len() as u64);
buffer.extend_from_slice(&type_buffer);
}
fn pack_field_type_recursive(
buffer: &mut Vec<u8>,
field_type: &FieldType,
nibble_count: &mut usize,
) {
let tag = field_type.type_tag();
if (*nibble_count).is_multiple_of(2) {
buffer.push(tag);
} else {
let last_idx = buffer.len() - 1;
buffer[last_idx] |= tag << 4;
}
*nibble_count += 1;
if let FieldType::Array(element_type) = field_type {
pack_field_type_recursive(buffer, element_type, nibble_count);
}
}
fn pack_values(buffer: &mut Vec<u8>, ir: &IntermediateRepresentation) {
for value in &ir.values {
pack_value(buffer, value);
}
}
fn pack_value(buffer: &mut Vec<u8>, value: &SchemaValue) {
match value {
SchemaValue::U64(v) => encode_varint(buffer, *v),
SchemaValue::I64(v) => encode_signed_varint(buffer, *v),
SchemaValue::F64(v) => buffer.extend_from_slice(&v.to_le_bytes()),
SchemaValue::String(s) => {
encode_varint(buffer, s.len() as u64);
buffer.extend_from_slice(s.as_bytes());
}
SchemaValue::Bool(b) => buffer.push(if *b { 1 } else { 0 }),
SchemaValue::Null => {} SchemaValue::Array(arr) => {
encode_varint(buffer, arr.len() as u64);
let bitmap_bytes = arr.len().div_ceil(8);
let mut null_bitmap = vec![0u8; bitmap_bytes];
for (idx, item) in arr.iter().enumerate() {
if matches!(item, SchemaValue::Null) {
let byte_idx = idx / 8;
let bit_idx = idx % 8;
null_bitmap[byte_idx] |= 1 << bit_idx;
}
}
buffer.extend_from_slice(&null_bitmap);
for item in arr {
if !matches!(item, SchemaValue::Null) {
pack_value(buffer, item);
}
}
}
}
}
pub(crate) fn encode_varint(buffer: &mut Vec<u8>, mut value: u64) {
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80; }
buffer.push(byte);
if value == 0 {
break;
}
}
}
pub(crate) fn encode_signed_varint(buffer: &mut Vec<u8>, value: i64) {
let encoded = ((value << 1) ^ (value >> 63)) as u64;
encode_varint(buffer, encoded);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::encoders::algorithms::schema::types::{FieldDef, SchemaHeader};
#[test]
fn test_encode_varint() {
let mut buf = Vec::new();
encode_varint(&mut buf, 0);
assert_eq!(buf, vec![0]);
buf.clear();
encode_varint(&mut buf, 1);
assert_eq!(buf, vec![1]);
buf.clear();
encode_varint(&mut buf, 127);
assert_eq!(buf, vec![127]);
buf.clear();
encode_varint(&mut buf, 128);
assert_eq!(buf, vec![0x80, 0x01]);
buf.clear();
encode_varint(&mut buf, 16383);
assert_eq!(buf, vec![0xFF, 0x7F]);
buf.clear();
encode_varint(&mut buf, 16384);
assert_eq!(buf, vec![0x80, 0x80, 0x01]);
}
#[test]
fn test_encode_signed_varint() {
let mut buf = Vec::new();
encode_signed_varint(&mut buf, 0);
assert_eq!(buf, vec![0]);
buf.clear();
encode_signed_varint(&mut buf, -1);
assert_eq!(buf, vec![1]);
buf.clear();
encode_signed_varint(&mut buf, 1);
assert_eq!(buf, vec![2]);
buf.clear();
encode_signed_varint(&mut buf, -64);
assert_eq!(buf, vec![127]);
buf.clear();
encode_signed_varint(&mut buf, 64);
assert_eq!(buf, vec![128, 1]);
}
#[test]
fn test_pack_simple_ir() {
let fields = vec![
FieldDef::new("id", FieldType::U64),
FieldDef::new("name", FieldType::String),
];
let header = SchemaHeader::new(1, fields);
let values = vec![
SchemaValue::U64(42),
SchemaValue::String("Alice".to_string()),
];
let ir = IntermediateRepresentation::new(header, values).unwrap();
let packed = pack(&ir);
assert!(!packed.is_empty());
assert_eq!(packed[0], 0);
}
#[test]
fn test_pack_with_root_key() {
let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
header.root_key = Some("users".to_string());
header.set_flag(FLAG_HAS_ROOT_KEY);
let values = vec![SchemaValue::U64(42)];
let ir = IntermediateRepresentation::new(header, values).unwrap();
let packed = pack(&ir);
assert_eq!(packed[0] & FLAG_HAS_ROOT_KEY, FLAG_HAS_ROOT_KEY);
}
#[test]
fn test_pack_field_types() {
let fields = vec![
FieldDef::new("a", FieldType::U64), FieldDef::new("b", FieldType::I64), FieldDef::new("c", FieldType::String), ];
let header = SchemaHeader::new(1, fields);
let values = vec![
SchemaValue::U64(1),
SchemaValue::I64(-1),
SchemaValue::String("x".to_string()),
];
let ir = IntermediateRepresentation::new(header, values).unwrap();
let packed = pack(&ir);
assert!(!packed.is_empty());
}
#[test]
fn test_pack_values() {
let mut buffer = Vec::new();
pack_value(&mut buffer, &SchemaValue::U64(42));
assert_eq!(buffer, vec![42]);
buffer.clear();
pack_value(&mut buffer, &SchemaValue::Bool(true));
assert_eq!(buffer, vec![1]);
buffer.clear();
pack_value(&mut buffer, &SchemaValue::String("hi".to_string()));
assert_eq!(buffer, vec![2, b'h', b'i']);
}
#[test]
fn test_pack_array() {
let mut buffer = Vec::new();
let array = SchemaValue::Array(vec![SchemaValue::U64(1), SchemaValue::U64(2)]);
pack_value(&mut buffer, &array);
assert_eq!(buffer, vec![2, 0, 1, 2]);
}
#[test]
fn test_pack_array_with_nulls() {
let mut buffer = Vec::new();
let array = SchemaValue::Array(vec![
SchemaValue::U64(1),
SchemaValue::Null,
SchemaValue::U64(3),
]);
pack_value(&mut buffer, &array);
assert_eq!(buffer, vec![3, 0b00000010, 1, 3]);
}
}