#[allow(dead_code)]
mod adaptive_encode;
mod batch_encode;
mod bitmap;
#[allow(dead_code)]
mod bloom;
#[allow(dead_code)]
mod codec_decode;
#[allow(dead_code)]
mod codec_encode;
#[allow(dead_code)]
mod columnar;
mod decoder;
mod dictionary;
#[allow(dead_code)]
mod encoder;
#[allow(dead_code)]
mod fast_decode;
#[allow(dead_code)]
mod fast_encode;
#[allow(dead_code)]
mod parallel_encode;
mod predicate_pushdown;
mod schema;
#[allow(dead_code)]
mod schema_encode;
mod serde_impl;
#[allow(dead_code)]
mod simd_decode;
mod stats;
mod stats_collector;
mod traits;
#[allow(dead_code)]
mod ultra_encode;
#[allow(dead_code)]
mod varint;
pub use decoder::{EnumAccess, MapAccess, SeqAccess, TbfDeserializer};
pub use dictionary::{BorrowedDictionary, StringDictionary};
pub use encoder::TbfSerializer;
pub use schema::{Schema, SchemaField, SchemaRegistry, SchemaType, infer_schema_from_json};
pub use traits::{TbfDecode, TbfEncode};
pub use varint::{decode_signed_varint, decode_varint, encode_signed_varint, encode_varint};
pub use schema_encode::{
ColumnSchema, FieldEncoding, TableEncode, TableSchema, TableSchemaBuilder,
};
pub use columnar::{
ColumnarDecode, ColumnarDecoder, ColumnarEncode, ColumnarEncoder, TBC_MAGIC, TBC_VERSION,
};
pub use stats::ColumnStats;
pub use stats_collector::StatisticsCollector;
pub use fast_decode::{
FastBorrowedDictionary, FastDecode, fast_decode_signed_varint, fast_decode_varint,
};
pub use fast_encode::{
FastBuffer, FastEncode, FastStringDictionary, fast_encode_signed_varint, fast_encode_slice,
fast_encode_varint,
};
pub use predicate_pushdown::{Predicate, QueryFilter};
pub use bloom::BloomFilter;
pub use bitmap::NullBitmap;
pub use adaptive_encode::{CodecAnalysis, CodecAnalyzer, CompressionCodec};
pub use batch_encode::{BatchEncoder, BatchEncodingStats};
pub use ultra_encode::{
ColumnCollectors, DirectStringEncoder, DirectU32Encoder, ULTRA_MAGIC, ULTRA_VERSION,
UltraBuffer, UltraEncode, UltraEncodeDirect, encode_varint_to_ultra,
};
pub use ultra_encode::ColumnType as UltraColumnType;
pub use columnar::{ColumnReader, ColumnType};
pub use codec_decode::CodecDecodingContext;
pub use codec_encode::{CodecEncodingContext, CodecMetadata};
pub use schema_encode::{
AdaptiveIntEncoder, AdaptiveStringEncoder, SCHEMA_MAGIC, encode_varint_fast,
};
#[cfg(feature = "performance")]
pub use parallel_encode::{ParallelBatchEncoder, ParallelEncodingStats};
use crate::error::TauqError;
pub const TBF_MAGIC: [u8; 4] = [0x54, 0x42, 0x46, 0x01];
pub const TBF_VERSION: u8 = 1;
pub const FLAG_DICTIONARY: u8 = 0x02;
pub const FLAG_CODEC_METADATA: u8 = 0x04;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum TypeTag {
Null = 0,
Bool = 1,
Int = 2,
Float = 3,
String = 4,
Bytes = 5,
Seq = 6,
Map = 7,
Unit = 8,
None = 9,
Some = 10,
I8 = 11,
I16 = 12,
I32 = 13,
I64 = 14,
I128 = 15,
U8 = 16,
U16 = 17,
U32 = 18,
U64 = 19,
U128 = 20,
F32 = 21,
F64 = 22,
Char = 23,
}
impl TypeTag {
pub fn from_u8(v: u8) -> Option<Self> {
match v {
0 => Some(TypeTag::Null),
1 => Some(TypeTag::Bool),
2 => Some(TypeTag::Int),
3 => Some(TypeTag::Float),
4 => Some(TypeTag::String),
5 => Some(TypeTag::Bytes),
6 => Some(TypeTag::Seq),
7 => Some(TypeTag::Map),
8 => Some(TypeTag::Unit),
9 => Some(TypeTag::None),
10 => Some(TypeTag::Some),
11 => Some(TypeTag::I8),
12 => Some(TypeTag::I16),
13 => Some(TypeTag::I32),
14 => Some(TypeTag::I64),
15 => Some(TypeTag::I128),
16 => Some(TypeTag::U8),
17 => Some(TypeTag::U16),
18 => Some(TypeTag::U32),
19 => Some(TypeTag::U64),
20 => Some(TypeTag::U128),
21 => Some(TypeTag::F32),
22 => Some(TypeTag::F64),
23 => Some(TypeTag::Char),
_ => None,
}
}
}
pub fn to_bytes<T: serde::Serialize>(value: &T) -> Result<Vec<u8>, TauqError> {
let mut serializer = TbfSerializer::new();
value.serialize(&mut serializer)?;
Ok(serializer.into_bytes())
}
pub fn to_bytes_with_capacity<T: serde::Serialize>(
value: &T,
capacity: usize,
) -> Result<Vec<u8>, TauqError> {
let mut serializer = TbfSerializer::with_capacity(capacity);
value.serialize(&mut serializer)?;
Ok(serializer.into_bytes())
}
pub fn from_bytes<'de, T: serde::Deserialize<'de>>(bytes: &'de [u8]) -> Result<T, TauqError> {
let mut deserializer = TbfDeserializer::new(bytes)?;
T::deserialize(&mut deserializer)
}
pub fn encode(source: &str) -> Result<Vec<u8>, TauqError> {
let json = crate::compile_tauq(source)?;
encode_json(&json)
}
pub fn encode_json(json: &serde_json::Value) -> Result<Vec<u8>, TauqError> {
to_bytes(json)
}
pub fn decode(data: &[u8]) -> Result<serde_json::Value, TauqError> {
from_bytes(data)
}
pub fn decode_to_tauq(data: &[u8]) -> Result<String, TauqError> {
let json: serde_json::Value = from_bytes(data)?;
Ok(crate::format_to_tauq(&json))
}
#[cfg(test)]
mod tests {
use super::*;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct TestUser {
id: u32,
name: String,
age: u32,
active: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Employee {
id: u32,
name: String,
age: u32,
city: String,
department: String,
salary: u32,
}
#[test]
fn test_direct_serde_roundtrip() {
let user = TestUser {
id: 1,
name: "Alice".into(),
age: 30,
active: true,
};
let bytes = to_bytes(&user).unwrap();
let decoded: TestUser = from_bytes(&bytes).unwrap();
assert_eq!(user, decoded);
}
#[test]
fn test_vec_roundtrip() {
let users = vec![
TestUser {
id: 1,
name: "Alice".into(),
age: 30,
active: true,
},
TestUser {
id: 2,
name: "Bob".into(),
age: 25,
active: false,
},
TestUser {
id: 3,
name: "Carol".into(),
age: 35,
active: true,
},
];
let bytes = to_bytes(&users).unwrap();
let decoded: Vec<TestUser> = from_bytes(&bytes).unwrap();
assert_eq!(users, decoded);
}
#[test]
fn test_primitives() {
let v: i32 = -42;
assert_eq!(v, from_bytes::<i32>(&to_bytes(&v).unwrap()).unwrap());
let v: u64 = 12345678901234;
assert_eq!(v, from_bytes::<u64>(&to_bytes(&v).unwrap()).unwrap());
let v: f64 = 1.234567890123;
assert_eq!(v, from_bytes::<f64>(&to_bytes(&v).unwrap()).unwrap());
let v: bool = true;
assert_eq!(v, from_bytes::<bool>(&to_bytes(&v).unwrap()).unwrap());
let v: String = "Hello, World!".into();
assert_eq!(v, from_bytes::<String>(&to_bytes(&v).unwrap()).unwrap());
}
#[test]
fn test_option() {
let some: Option<i32> = Some(42);
let none: Option<i32> = None;
assert_eq!(some, from_bytes(&to_bytes(&some).unwrap()).unwrap());
assert_eq!(
none,
from_bytes::<Option<i32>>(&to_bytes(&none).unwrap()).unwrap()
);
}
#[test]
fn test_nested_struct() {
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Outer {
name: String,
inner: Inner,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Inner {
value: i32,
data: Vec<u8>,
}
let outer = Outer {
name: "test".into(),
inner: Inner {
value: 42,
data: vec![1, 2, 3, 4, 5],
},
};
let bytes = to_bytes(&outer).unwrap();
let decoded: Outer = from_bytes(&bytes).unwrap();
assert_eq!(outer, decoded);
}
#[test]
fn test_json_value_roundtrip() {
let json = serde_json::json!({
"users": [
{"id": 1, "name": "Alice", "age": 30},
{"id": 2, "name": "Bob", "age": 25},
],
"count": 2
});
let bytes = encode_json(&json).unwrap();
let decoded = decode(&bytes).unwrap();
assert_eq!(json, decoded);
}
#[test]
fn test_size_comparison() {
let employees: Vec<Employee> = (0..100)
.map(|i| Employee {
id: i,
name: format!("Employee{}", i),
age: 25 + (i % 40),
city: ["NYC", "LA", "Chicago", "Houston", "Phoenix"][i as usize % 5].into(),
department: ["Engineering", "Sales", "Marketing", "HR", "Finance"][i as usize % 5]
.into(),
salary: 50000 + (i * 1000),
})
.collect();
let json_str = serde_json::to_string(&employees).unwrap();
let tbf_bytes = to_bytes(&employees).unwrap();
println!("JSON size: {} bytes", json_str.len());
println!("TBF size: {} bytes", tbf_bytes.len());
println!(
"Compression ratio: {:.1}%",
(tbf_bytes.len() as f64 / json_str.len() as f64) * 100.0
);
assert!(tbf_bytes.len() < json_str.len());
}
}