1use std::collections::HashMap;
54use sochdb_core::{Result, SochDBError, SochValue};
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum PackedColumnType {
59 Bool,
60 Int64,
61 UInt64,
62 Float64,
63 Text,
64 Binary,
65 Null,
66}
67
68impl PackedColumnType {
69 #[inline]
71 pub fn from_byte(b: u8) -> Option<Self> {
72 match b {
73 0 => Some(Self::Null),
74 1 => Some(Self::Bool),
75 2 => Some(Self::Int64),
76 3 => Some(Self::UInt64),
77 4 => Some(Self::Float64),
78 5 => Some(Self::Text),
79 6 => Some(Self::Binary),
80 _ => None,
81 }
82 }
83
84 #[inline]
86 pub fn to_byte(self) -> u8 {
87 match self {
88 Self::Null => 0,
89 Self::Bool => 1,
90 Self::Int64 => 2,
91 Self::UInt64 => 3,
92 Self::Float64 => 4,
93 Self::Text => 5,
94 Self::Binary => 6,
95 }
96 }
97}
98
99#[derive(Debug, Clone)]
101pub struct PackedColumnDef {
102 pub name: String,
103 pub col_type: PackedColumnType,
104 pub nullable: bool,
105}
106
107#[derive(Debug, Clone)]
109pub struct PackedTableSchema {
110 pub name: String,
111 pub columns: Vec<PackedColumnDef>,
112}
113
114impl PackedTableSchema {
115 pub fn new(name: impl Into<String>, columns: Vec<PackedColumnDef>) -> Self {
117 Self {
118 name: name.into(),
119 columns,
120 }
121 }
122
123 #[inline]
125 pub fn column_index(&self, name: &str) -> Option<usize> {
126 self.columns.iter().position(|c| c.name == name)
127 }
128
129 #[inline]
131 pub fn column(&self, idx: usize) -> Option<&PackedColumnDef> {
132 self.columns.get(idx)
133 }
134
135 #[inline]
137 pub fn num_columns(&self) -> usize {
138 self.columns.len()
139 }
140}
141
142#[repr(C)]
151pub struct PackedRow {
152 data: Vec<u8>,
154 num_cols: u16,
156 null_bitmap_size: usize,
158}
159
160impl PackedRow {
161 #[inline]
163 fn buffer_size(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> usize {
164 let k = schema.columns.len();
165 let null_bitmap_size = k.div_ceil(8);
166 let offsets_size = k * 4;
167 let data_size: usize = schema
168 .columns
169 .iter()
170 .map(|col| Self::value_size(values.get(&col.name)))
171 .sum();
172 null_bitmap_size + offsets_size + data_size
173 }
174
175 #[inline]
177 fn value_size(value: Option<&SochValue>) -> usize {
178 match value {
179 None | Some(SochValue::Null) => 0,
180 Some(SochValue::Bool(_)) => 1,
181 Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
182 Some(SochValue::Text(s)) => 4 + s.len(),
183 Some(SochValue::Binary(b)) => 4 + b.len(),
184 _ => 0, }
186 }
187
188 pub fn pack(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> Self {
197 let k = schema.columns.len();
198 let null_bitmap_size = k.div_ceil(8);
199
200 let total_size = Self::buffer_size(schema, values);
202 let mut data = Vec::with_capacity(total_size);
203
204 let mut null_bits = vec![0u8; null_bitmap_size];
206 for (i, col) in schema.columns.iter().enumerate() {
207 match values.get(&col.name) {
208 None | Some(SochValue::Null) => {
209 null_bits[i / 8] |= 1 << (i % 8);
210 }
211 _ => {}
212 }
213 }
214 data.extend_from_slice(&null_bits);
215
216 let offsets_start = data.len();
218 data.resize(offsets_start + k * 4, 0);
219
220 let data_start = offsets_start + k * 4;
222
223 for (i, col) in schema.columns.iter().enumerate() {
224 let offset = (data.len() - data_start) as u32;
226 let offset_pos = offsets_start + i * 4;
227 data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
228
229 if let Some(value) = values.get(&col.name) {
231 Self::write_value(&mut data, value);
232 }
233 }
234
235 Self {
236 data,
237 num_cols: k as u16,
238 null_bitmap_size,
239 }
240 }
241
242 #[inline]
253 pub fn pack_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> Self {
254 let k = schema.columns.len();
255 debug_assert_eq!(
256 values.len(),
257 k,
258 "values slice must match schema column count"
259 );
260
261 let null_bitmap_size = k.div_ceil(8);
262 let total_size = Self::buffer_size_slice(schema, values);
263
264 if total_size <= 512 {
266 Self::pack_slice_small(schema, values, k, null_bitmap_size, total_size)
267 } else {
268 Self::pack_slice_large(schema, values, k, null_bitmap_size, total_size)
269 }
270 }
271
272 #[inline]
274 fn pack_slice_small(
275 _schema: &PackedTableSchema,
276 values: &[Option<&SochValue>],
277 k: usize,
278 null_bitmap_size: usize,
279 total_size: usize,
280 ) -> Self {
281 let mut stack_buf = [0u8; 512];
283 let buf = &mut stack_buf[..total_size];
284
285 for (i, val) in values.iter().enumerate() {
287 match val {
288 None | Some(SochValue::Null) => {
289 buf[i / 8] |= 1 << (i % 8);
290 }
291 _ => {}
292 }
293 }
294
295 let offsets_start = null_bitmap_size;
297 let data_start = offsets_start + k * 4;
298 let mut data_pos = data_start;
299
300 for (i, val) in values.iter().enumerate() {
301 let offset = (data_pos - data_start) as u32;
302 let offset_pos = offsets_start + i * 4;
303 buf[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
304
305 if let Some(value) = val {
306 data_pos += Self::write_value_to_slice(&mut buf[data_pos..], value);
307 }
308 }
309
310 Self {
311 data: buf[..total_size].to_vec(),
312 num_cols: k as u16,
313 null_bitmap_size,
314 }
315 }
316
317 #[inline]
319 fn pack_slice_large(
320 _schema: &PackedTableSchema,
321 values: &[Option<&SochValue>],
322 k: usize,
323 null_bitmap_size: usize,
324 total_size: usize,
325 ) -> Self {
326 let mut data = Vec::with_capacity(total_size);
328
329 let mut null_bits = vec![0u8; null_bitmap_size];
331 for (i, val) in values.iter().enumerate() {
332 match val {
333 None | Some(SochValue::Null) => {
334 null_bits[i / 8] |= 1 << (i % 8);
335 }
336 _ => {}
337 }
338 }
339 data.extend_from_slice(&null_bits);
340
341 let offsets_start = data.len();
343 data.resize(offsets_start + k * 4, 0);
344
345 let data_start = offsets_start + k * 4;
347
348 for (i, val) in values.iter().enumerate() {
349 let offset = (data.len() - data_start) as u32;
350 let offset_pos = offsets_start + i * 4;
351 data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
352
353 if let Some(value) = val {
354 Self::write_value(&mut data, value);
355 }
356 }
357
358 Self {
359 data,
360 num_cols: k as u16,
361 null_bitmap_size,
362 }
363 }
364
365 #[inline]
367 fn write_value_to_slice(buf: &mut [u8], value: &SochValue) -> usize {
368 match value {
369 SochValue::Null => 0,
370 SochValue::Bool(b) => {
371 buf[0] = if *b { 1 } else { 0 };
372 1
373 }
374 SochValue::Int(i) => {
375 buf[..8].copy_from_slice(&i.to_le_bytes());
376 8
377 }
378 SochValue::UInt(u) => {
379 buf[..8].copy_from_slice(&u.to_le_bytes());
380 8
381 }
382 SochValue::Float(f) => {
383 buf[..8].copy_from_slice(&f.to_bits().to_le_bytes());
384 8
385 }
386 SochValue::Text(s) => {
387 let len = s.len() as u32;
388 buf[..4].copy_from_slice(&len.to_le_bytes());
389 buf[4..4 + s.len()].copy_from_slice(s.as_bytes());
390 4 + s.len()
391 }
392 SochValue::Binary(b) => {
393 let len = b.len() as u32;
394 buf[..4].copy_from_slice(&len.to_le_bytes());
395 buf[4..4 + b.len()].copy_from_slice(b);
396 4 + b.len()
397 }
398 _ => 0,
399 }
400 }
401
402 #[inline]
404 fn buffer_size_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> usize {
405 let k = schema.columns.len();
406 let null_bitmap_size = k.div_ceil(8);
407 let offsets_size = k * 4;
408
409 let data_size: usize = values
410 .iter()
411 .map(|v| match v {
412 None | Some(SochValue::Null) => 0,
413 Some(SochValue::Bool(_)) => 1,
414 Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
415 Some(SochValue::Text(s)) => 4 + s.len(),
416 Some(SochValue::Binary(b)) => 4 + b.len(),
417 _ => 0,
418 })
419 .sum();
420
421 null_bitmap_size + offsets_size + data_size
422 }
423
424 #[inline]
429 pub fn unpack_to_vec(&self, schema: &PackedTableSchema) -> Vec<SochValue> {
430 let k = schema.columns.len();
431 let mut result = Vec::with_capacity(k);
432
433 for (i, col) in schema.columns.iter().enumerate() {
434 result.push(self.get_column(i, col.col_type).unwrap_or(SochValue::Null));
435 }
436
437 result
438 }
439
440 #[inline]
442 fn write_value(buf: &mut Vec<u8>, value: &SochValue) {
443 match value {
444 SochValue::Null => {}
445 SochValue::Bool(b) => buf.push(if *b { 1 } else { 0 }),
446 SochValue::Int(i) => buf.extend_from_slice(&i.to_le_bytes()),
447 SochValue::UInt(u) => buf.extend_from_slice(&u.to_le_bytes()),
448 SochValue::Float(f) => buf.extend_from_slice(&f.to_le_bytes()),
449 SochValue::Text(s) => {
450 buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
451 buf.extend_from_slice(s.as_bytes());
452 }
453 SochValue::Binary(b) => {
454 buf.extend_from_slice(&(b.len() as u32).to_le_bytes());
455 buf.extend_from_slice(b);
456 }
457 _ => {} }
459 }
460
461 #[inline]
470 pub fn get_column(&self, idx: usize, col_type: PackedColumnType) -> Option<SochValue> {
471 if idx >= self.num_cols as usize {
472 return None;
473 }
474
475 let k = self.num_cols as usize;
476
477 let null_byte = self.data[idx / 8];
479 if (null_byte & (1 << (idx % 8))) != 0 {
480 return Some(SochValue::Null);
481 }
482
483 let offset_pos = self.null_bitmap_size + idx * 4;
485 let offset = u32::from_le_bytes([
486 self.data[offset_pos],
487 self.data[offset_pos + 1],
488 self.data[offset_pos + 2],
489 self.data[offset_pos + 3],
490 ]) as usize;
491
492 let data_start = self.null_bitmap_size + k * 4;
493 let value_start = data_start + offset;
494
495 if value_start >= self.data.len() {
496 return Some(SochValue::Null);
497 }
498
499 Some(Self::read_value(&self.data[value_start..], col_type))
500 }
501
502 #[inline]
504 fn read_value(data: &[u8], col_type: PackedColumnType) -> SochValue {
505 match col_type {
506 PackedColumnType::Null => SochValue::Null,
507 PackedColumnType::Bool => {
508 if data.is_empty() {
509 SochValue::Null
510 } else {
511 SochValue::Bool(data[0] != 0)
512 }
513 }
514 PackedColumnType::Int64 => {
515 if data.len() < 8 {
516 SochValue::Null
517 } else {
518 let bytes: [u8; 8] = data[..8].try_into().unwrap();
519 SochValue::Int(i64::from_le_bytes(bytes))
520 }
521 }
522 PackedColumnType::UInt64 => {
523 if data.len() < 8 {
524 SochValue::Null
525 } else {
526 let bytes: [u8; 8] = data[..8].try_into().unwrap();
527 SochValue::UInt(u64::from_le_bytes(bytes))
528 }
529 }
530 PackedColumnType::Float64 => {
531 if data.len() < 8 {
532 SochValue::Null
533 } else {
534 let bytes: [u8; 8] = data[..8].try_into().unwrap();
535 SochValue::Float(f64::from_le_bytes(bytes))
536 }
537 }
538 PackedColumnType::Text => {
539 if data.len() < 4 {
540 SochValue::Null
541 } else {
542 let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
543 if data.len() < 4 + len {
544 SochValue::Null
545 } else {
546 match std::str::from_utf8(&data[4..4 + len]) {
547 Ok(s) => SochValue::Text(s.to_string()),
548 Err(_) => SochValue::Null,
549 }
550 }
551 }
552 }
553 PackedColumnType::Binary => {
554 if data.len() < 4 {
555 SochValue::Null
556 } else {
557 let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
558 if data.len() < 4 + len {
559 SochValue::Null
560 } else {
561 SochValue::Binary(data[4..4 + len].to_vec())
562 }
563 }
564 }
565 }
566 }
567
568 #[inline]
570 pub fn get_by_name(&self, schema: &PackedTableSchema, name: &str) -> Option<SochValue> {
571 let idx = schema.column_index(name)?;
572 let col = schema.column(idx)?;
573 self.get_column(idx, col.col_type)
574 }
575
576 #[inline]
578 pub fn as_bytes(&self) -> &[u8] {
579 &self.data
580 }
581
582 #[inline]
584 pub fn into_bytes(self) -> Vec<u8> {
585 self.data
586 }
587
588 pub fn from_bytes(data: Vec<u8>, num_cols: usize) -> Result<Self> {
594 let null_bitmap_size = num_cols.div_ceil(8);
595 let min_size = null_bitmap_size + num_cols * 4;
596
597 if data.len() < min_size {
598 return Err(SochDBError::Internal(format!(
599 "PackedRow data too short: {} < {}",
600 data.len(),
601 min_size
602 )));
603 }
604
605 Ok(Self {
606 data,
607 num_cols: num_cols as u16,
608 null_bitmap_size,
609 })
610 }
611
612 pub fn unpack(&self, schema: &PackedTableSchema) -> HashMap<String, SochValue> {
614 let mut result = HashMap::with_capacity(schema.columns.len());
615
616 for (i, col) in schema.columns.iter().enumerate() {
617 if let Some(value) = self.get_column(i, col.col_type)
618 && (!matches!(value, SochValue::Null) || col.nullable)
619 {
620 result.insert(col.name.clone(), value);
621 }
622 }
623
624 result
625 }
626
627 #[inline]
629 pub fn num_columns(&self) -> usize {
630 self.num_cols as usize
631 }
632
633 #[inline]
635 pub fn size(&self) -> usize {
636 self.data.len()
637 }
638}
639
640pub struct PackedRowBuilder {
642 schema: PackedTableSchema,
643 values: HashMap<String, SochValue>,
644}
645
646impl PackedRowBuilder {
647 pub fn new(schema: PackedTableSchema) -> Self {
649 let capacity = schema.columns.len();
650 Self {
651 schema,
652 values: HashMap::with_capacity(capacity),
653 }
654 }
655
656 pub fn set(mut self, name: impl Into<String>, value: SochValue) -> Self {
658 self.values.insert(name.into(), value);
659 self
660 }
661
662 pub fn set_int(self, name: impl Into<String>, value: i64) -> Self {
664 self.set(name, SochValue::Int(value))
665 }
666
667 pub fn set_text(self, name: impl Into<String>, value: impl Into<String>) -> Self {
669 self.set(name, SochValue::Text(value.into()))
670 }
671
672 pub fn set_float(self, name: impl Into<String>, value: f64) -> Self {
674 self.set(name, SochValue::Float(value))
675 }
676
677 pub fn set_bool(self, name: impl Into<String>, value: bool) -> Self {
679 self.set(name, SochValue::Bool(value))
680 }
681
682 pub fn build(self) -> PackedRow {
684 PackedRow::pack(&self.schema, &self.values)
685 }
686}
687
688#[cfg(test)]
689mod tests {
690 use super::*;
691
692 fn test_schema() -> PackedTableSchema {
693 PackedTableSchema::new(
694 "test",
695 vec![
696 PackedColumnDef {
697 name: "id".into(),
698 col_type: PackedColumnType::Int64,
699 nullable: false,
700 },
701 PackedColumnDef {
702 name: "name".into(),
703 col_type: PackedColumnType::Text,
704 nullable: false,
705 },
706 PackedColumnDef {
707 name: "score".into(),
708 col_type: PackedColumnType::Float64,
709 nullable: true,
710 },
711 PackedColumnDef {
712 name: "active".into(),
713 col_type: PackedColumnType::Bool,
714 nullable: true,
715 },
716 ],
717 )
718 }
719
720 #[test]
721 fn test_pack_unpack_roundtrip() {
722 let schema = test_schema();
723 let mut values = HashMap::new();
724 values.insert("id".to_string(), SochValue::Int(42));
725 values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
726 values.insert("score".to_string(), SochValue::Float(98.5));
727 values.insert("active".to_string(), SochValue::Bool(true));
728
729 let packed = PackedRow::pack(&schema, &values);
730
731 assert_eq!(
733 packed.get_column(0, PackedColumnType::Int64),
734 Some(SochValue::Int(42))
735 );
736 assert_eq!(
737 packed.get_column(1, PackedColumnType::Text),
738 Some(SochValue::Text("Alice".to_string()))
739 );
740 assert_eq!(
741 packed.get_column(2, PackedColumnType::Float64),
742 Some(SochValue::Float(98.5))
743 );
744 assert_eq!(
745 packed.get_column(3, PackedColumnType::Bool),
746 Some(SochValue::Bool(true))
747 );
748
749 let unpacked = packed.unpack(&schema);
751 assert_eq!(unpacked.get("id"), Some(&SochValue::Int(42)));
752 assert_eq!(
753 unpacked.get("name"),
754 Some(&SochValue::Text("Alice".to_string()))
755 );
756 }
757
758 #[test]
759 fn test_null_handling() {
760 let schema = test_schema();
761 let mut values = HashMap::new();
762 values.insert("id".to_string(), SochValue::Int(1));
763 values.insert("name".to_string(), SochValue::Text("Bob".to_string()));
764 let packed = PackedRow::pack(&schema, &values);
767
768 assert_eq!(
769 packed.get_column(0, PackedColumnType::Int64),
770 Some(SochValue::Int(1))
771 );
772 assert_eq!(
773 packed.get_column(2, PackedColumnType::Float64),
774 Some(SochValue::Null)
775 );
776 assert_eq!(
777 packed.get_column(3, PackedColumnType::Bool),
778 Some(SochValue::Null)
779 );
780 }
781
782 #[test]
783 fn test_bytes_roundtrip() {
784 let schema = test_schema();
785 let mut values = HashMap::new();
786 values.insert("id".to_string(), SochValue::Int(100));
787 values.insert("name".to_string(), SochValue::Text("Test".to_string()));
788
789 let packed = PackedRow::pack(&schema, &values);
790 let bytes = packed.as_bytes().to_vec();
791
792 let restored = PackedRow::from_bytes(bytes, schema.columns.len()).unwrap();
793 assert_eq!(
794 restored.get_column(0, PackedColumnType::Int64),
795 Some(SochValue::Int(100))
796 );
797 assert_eq!(
798 restored.get_column(1, PackedColumnType::Text),
799 Some(SochValue::Text("Test".to_string()))
800 );
801 }
802
803 #[test]
804 fn test_builder() {
805 let schema = test_schema();
806 let packed = PackedRowBuilder::new(schema.clone())
807 .set_int("id", 99)
808 .set_text("name", "Builder Test")
809 .set_float("score", 77.5)
810 .set_bool("active", false)
811 .build();
812
813 assert_eq!(packed.get_by_name(&schema, "id"), Some(SochValue::Int(99)));
814 assert_eq!(
815 packed.get_by_name(&schema, "name"),
816 Some(SochValue::Text("Builder Test".to_string()))
817 );
818 assert_eq!(
819 packed.get_by_name(&schema, "score"),
820 Some(SochValue::Float(77.5))
821 );
822 assert_eq!(
823 packed.get_by_name(&schema, "active"),
824 Some(SochValue::Bool(false))
825 );
826 }
827
828 #[test]
829 fn test_size_reduction() {
830 let schema = test_schema();
832 let mut values = HashMap::new();
833 values.insert("id".to_string(), SochValue::Int(42));
834 values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
835 values.insert("score".to_string(), SochValue::Float(98.5));
836 values.insert("active".to_string(), SochValue::Bool(true));
837
838 let packed = PackedRow::pack(&schema, &values);
839
840 assert!(packed.size() < 50, "Packed row should be compact");
843 }
844}