1use sochdb_core::{Result, SochDBError, SochValue};
57use std::collections::HashMap;
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum PackedColumnType {
62 Bool,
63 Int64,
64 UInt64,
65 Float64,
66 Text,
67 Binary,
68 Null,
69}
70
71impl PackedColumnType {
72 #[inline]
74 pub fn from_byte(b: u8) -> Option<Self> {
75 match b {
76 0 => Some(Self::Null),
77 1 => Some(Self::Bool),
78 2 => Some(Self::Int64),
79 3 => Some(Self::UInt64),
80 4 => Some(Self::Float64),
81 5 => Some(Self::Text),
82 6 => Some(Self::Binary),
83 _ => None,
84 }
85 }
86
87 #[inline]
89 pub fn to_byte(self) -> u8 {
90 match self {
91 Self::Null => 0,
92 Self::Bool => 1,
93 Self::Int64 => 2,
94 Self::UInt64 => 3,
95 Self::Float64 => 4,
96 Self::Text => 5,
97 Self::Binary => 6,
98 }
99 }
100}
101
102#[derive(Debug, Clone)]
104pub struct PackedColumnDef {
105 pub name: String,
106 pub col_type: PackedColumnType,
107 pub nullable: bool,
108}
109
110#[derive(Debug, Clone)]
112pub struct PackedTableSchema {
113 pub name: String,
114 pub columns: Vec<PackedColumnDef>,
115}
116
117impl PackedTableSchema {
118 pub fn new(name: impl Into<String>, columns: Vec<PackedColumnDef>) -> Self {
120 Self {
121 name: name.into(),
122 columns,
123 }
124 }
125
126 #[inline]
128 pub fn column_index(&self, name: &str) -> Option<usize> {
129 self.columns.iter().position(|c| c.name == name)
130 }
131
132 #[inline]
134 pub fn column(&self, idx: usize) -> Option<&PackedColumnDef> {
135 self.columns.get(idx)
136 }
137
138 #[inline]
140 pub fn num_columns(&self) -> usize {
141 self.columns.len()
142 }
143}
144
145#[repr(C)]
154pub struct PackedRow {
155 data: Vec<u8>,
157 num_cols: u16,
159 null_bitmap_size: usize,
161}
162
163impl PackedRow {
164 #[inline]
166 fn buffer_size(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> usize {
167 let k = schema.columns.len();
168 let null_bitmap_size = k.div_ceil(8);
169 let offsets_size = k * 4;
170 let data_size: usize = schema
171 .columns
172 .iter()
173 .map(|col| Self::value_size(values.get(&col.name)))
174 .sum();
175 null_bitmap_size + offsets_size + data_size
176 }
177
178 #[inline]
180 fn value_size(value: Option<&SochValue>) -> usize {
181 match value {
182 None | Some(SochValue::Null) => 0,
183 Some(SochValue::Bool(_)) => 1,
184 Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
185 Some(SochValue::Text(s)) => 4 + s.len(),
186 Some(SochValue::Binary(b)) => 4 + b.len(),
187 _ => 0, }
189 }
190
191 pub fn pack(schema: &PackedTableSchema, values: &HashMap<String, SochValue>) -> Self {
200 let k = schema.columns.len();
201 let null_bitmap_size = k.div_ceil(8);
202
203 let total_size = Self::buffer_size(schema, values);
205 let mut data = Vec::with_capacity(total_size);
206
207 let mut null_bits = vec![0u8; null_bitmap_size];
209 for (i, col) in schema.columns.iter().enumerate() {
210 match values.get(&col.name) {
211 None | Some(SochValue::Null) => {
212 null_bits[i / 8] |= 1 << (i % 8);
213 }
214 _ => {}
215 }
216 }
217 data.extend_from_slice(&null_bits);
218
219 let offsets_start = data.len();
221 data.resize(offsets_start + k * 4, 0);
222
223 let data_start = offsets_start + k * 4;
225
226 for (i, col) in schema.columns.iter().enumerate() {
227 let offset = (data.len() - data_start) as u32;
229 let offset_pos = offsets_start + i * 4;
230 data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
231
232 if let Some(value) = values.get(&col.name) {
234 Self::write_value(&mut data, value);
235 }
236 }
237
238 Self {
239 data,
240 num_cols: k as u16,
241 null_bitmap_size,
242 }
243 }
244
245 #[inline]
256 pub fn pack_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> Self {
257 let k = schema.columns.len();
258 debug_assert_eq!(
259 values.len(),
260 k,
261 "values slice must match schema column count"
262 );
263
264 let null_bitmap_size = k.div_ceil(8);
265 let total_size = Self::buffer_size_slice(schema, values);
266
267 if total_size <= 512 {
269 Self::pack_slice_small(schema, values, k, null_bitmap_size, total_size)
270 } else {
271 Self::pack_slice_large(schema, values, k, null_bitmap_size, total_size)
272 }
273 }
274
275 #[inline]
277 fn pack_slice_small(
278 _schema: &PackedTableSchema,
279 values: &[Option<&SochValue>],
280 k: usize,
281 null_bitmap_size: usize,
282 total_size: usize,
283 ) -> Self {
284 let mut stack_buf = [0u8; 512];
286 let buf = &mut stack_buf[..total_size];
287
288 for (i, val) in values.iter().enumerate() {
290 match val {
291 None | Some(SochValue::Null) => {
292 buf[i / 8] |= 1 << (i % 8);
293 }
294 _ => {}
295 }
296 }
297
298 let offsets_start = null_bitmap_size;
300 let data_start = offsets_start + k * 4;
301 let mut data_pos = data_start;
302
303 for (i, val) in values.iter().enumerate() {
304 let offset = (data_pos - data_start) as u32;
305 let offset_pos = offsets_start + i * 4;
306 buf[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
307
308 if let Some(value) = val {
309 data_pos += Self::write_value_to_slice(&mut buf[data_pos..], value);
310 }
311 }
312
313 Self {
314 data: buf[..total_size].to_vec(),
315 num_cols: k as u16,
316 null_bitmap_size,
317 }
318 }
319
320 #[inline]
322 fn pack_slice_large(
323 _schema: &PackedTableSchema,
324 values: &[Option<&SochValue>],
325 k: usize,
326 null_bitmap_size: usize,
327 total_size: usize,
328 ) -> Self {
329 let mut data = Vec::with_capacity(total_size);
331
332 let mut null_bits = vec![0u8; null_bitmap_size];
334 for (i, val) in values.iter().enumerate() {
335 match val {
336 None | Some(SochValue::Null) => {
337 null_bits[i / 8] |= 1 << (i % 8);
338 }
339 _ => {}
340 }
341 }
342 data.extend_from_slice(&null_bits);
343
344 let offsets_start = data.len();
346 data.resize(offsets_start + k * 4, 0);
347
348 let data_start = offsets_start + k * 4;
350
351 for (i, val) in values.iter().enumerate() {
352 let offset = (data.len() - data_start) as u32;
353 let offset_pos = offsets_start + i * 4;
354 data[offset_pos..offset_pos + 4].copy_from_slice(&offset.to_le_bytes());
355
356 if let Some(value) = val {
357 Self::write_value(&mut data, value);
358 }
359 }
360
361 Self {
362 data,
363 num_cols: k as u16,
364 null_bitmap_size,
365 }
366 }
367
368 #[inline]
370 fn write_value_to_slice(buf: &mut [u8], value: &SochValue) -> usize {
371 match value {
372 SochValue::Null => 0,
373 SochValue::Bool(b) => {
374 buf[0] = if *b { 1 } else { 0 };
375 1
376 }
377 SochValue::Int(i) => {
378 buf[..8].copy_from_slice(&i.to_le_bytes());
379 8
380 }
381 SochValue::UInt(u) => {
382 buf[..8].copy_from_slice(&u.to_le_bytes());
383 8
384 }
385 SochValue::Float(f) => {
386 buf[..8].copy_from_slice(&f.to_bits().to_le_bytes());
387 8
388 }
389 SochValue::Text(s) => {
390 let len = s.len() as u32;
391 buf[..4].copy_from_slice(&len.to_le_bytes());
392 buf[4..4 + s.len()].copy_from_slice(s.as_bytes());
393 4 + s.len()
394 }
395 SochValue::Binary(b) => {
396 let len = b.len() as u32;
397 buf[..4].copy_from_slice(&len.to_le_bytes());
398 buf[4..4 + b.len()].copy_from_slice(b);
399 4 + b.len()
400 }
401 _ => 0,
402 }
403 }
404
405 #[inline]
407 fn buffer_size_slice(schema: &PackedTableSchema, values: &[Option<&SochValue>]) -> usize {
408 let k = schema.columns.len();
409 let null_bitmap_size = k.div_ceil(8);
410 let offsets_size = k * 4;
411
412 let data_size: usize = values
413 .iter()
414 .map(|v| match v {
415 None | Some(SochValue::Null) => 0,
416 Some(SochValue::Bool(_)) => 1,
417 Some(SochValue::Int(_) | SochValue::UInt(_) | SochValue::Float(_)) => 8,
418 Some(SochValue::Text(s)) => 4 + s.len(),
419 Some(SochValue::Binary(b)) => 4 + b.len(),
420 _ => 0,
421 })
422 .sum();
423
424 null_bitmap_size + offsets_size + data_size
425 }
426
427 #[inline]
432 pub fn unpack_to_vec(&self, schema: &PackedTableSchema) -> Vec<SochValue> {
433 let k = schema.columns.len();
434 let mut result = Vec::with_capacity(k);
435
436 for (i, col) in schema.columns.iter().enumerate() {
437 result.push(self.get_column(i, col.col_type).unwrap_or(SochValue::Null));
438 }
439
440 result
441 }
442
443 #[inline]
445 fn write_value(buf: &mut Vec<u8>, value: &SochValue) {
446 match value {
447 SochValue::Null => {}
448 SochValue::Bool(b) => buf.push(if *b { 1 } else { 0 }),
449 SochValue::Int(i) => buf.extend_from_slice(&i.to_le_bytes()),
450 SochValue::UInt(u) => buf.extend_from_slice(&u.to_le_bytes()),
451 SochValue::Float(f) => buf.extend_from_slice(&f.to_le_bytes()),
452 SochValue::Text(s) => {
453 buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
454 buf.extend_from_slice(s.as_bytes());
455 }
456 SochValue::Binary(b) => {
457 buf.extend_from_slice(&(b.len() as u32).to_le_bytes());
458 buf.extend_from_slice(b);
459 }
460 _ => {} }
462 }
463
464 #[inline]
473 pub fn get_column(&self, idx: usize, col_type: PackedColumnType) -> Option<SochValue> {
474 if idx >= self.num_cols as usize {
475 return None;
476 }
477
478 let k = self.num_cols as usize;
479
480 let null_byte = self.data[idx / 8];
482 if (null_byte & (1 << (idx % 8))) != 0 {
483 return Some(SochValue::Null);
484 }
485
486 let offset_pos = self.null_bitmap_size + idx * 4;
488 let offset = u32::from_le_bytes([
489 self.data[offset_pos],
490 self.data[offset_pos + 1],
491 self.data[offset_pos + 2],
492 self.data[offset_pos + 3],
493 ]) as usize;
494
495 let data_start = self.null_bitmap_size + k * 4;
496 let value_start = data_start + offset;
497
498 if value_start >= self.data.len() {
499 return Some(SochValue::Null);
500 }
501
502 Some(Self::read_value(&self.data[value_start..], col_type))
503 }
504
505 #[inline]
507 fn read_value(data: &[u8], col_type: PackedColumnType) -> SochValue {
508 match col_type {
509 PackedColumnType::Null => SochValue::Null,
510 PackedColumnType::Bool => {
511 if data.is_empty() {
512 SochValue::Null
513 } else {
514 SochValue::Bool(data[0] != 0)
515 }
516 }
517 PackedColumnType::Int64 => {
518 if data.len() < 8 {
519 SochValue::Null
520 } else {
521 let bytes: [u8; 8] = data[..8].try_into().unwrap();
522 SochValue::Int(i64::from_le_bytes(bytes))
523 }
524 }
525 PackedColumnType::UInt64 => {
526 if data.len() < 8 {
527 SochValue::Null
528 } else {
529 let bytes: [u8; 8] = data[..8].try_into().unwrap();
530 SochValue::UInt(u64::from_le_bytes(bytes))
531 }
532 }
533 PackedColumnType::Float64 => {
534 if data.len() < 8 {
535 SochValue::Null
536 } else {
537 let bytes: [u8; 8] = data[..8].try_into().unwrap();
538 SochValue::Float(f64::from_le_bytes(bytes))
539 }
540 }
541 PackedColumnType::Text => {
542 if data.len() < 4 {
543 SochValue::Null
544 } else {
545 let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
546 if data.len() < 4 + len {
547 SochValue::Null
548 } else {
549 match std::str::from_utf8(&data[4..4 + len]) {
550 Ok(s) => SochValue::Text(s.to_string()),
551 Err(_) => SochValue::Null,
552 }
553 }
554 }
555 }
556 PackedColumnType::Binary => {
557 if data.len() < 4 {
558 SochValue::Null
559 } else {
560 let len = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
561 if data.len() < 4 + len {
562 SochValue::Null
563 } else {
564 SochValue::Binary(data[4..4 + len].to_vec())
565 }
566 }
567 }
568 }
569 }
570
571 #[inline]
573 pub fn get_by_name(&self, schema: &PackedTableSchema, name: &str) -> Option<SochValue> {
574 let idx = schema.column_index(name)?;
575 let col = schema.column(idx)?;
576 self.get_column(idx, col.col_type)
577 }
578
579 #[inline]
581 pub fn as_bytes(&self) -> &[u8] {
582 &self.data
583 }
584
585 #[inline]
587 pub fn into_bytes(self) -> Vec<u8> {
588 self.data
589 }
590
591 pub fn from_bytes(data: Vec<u8>, num_cols: usize) -> Result<Self> {
597 let null_bitmap_size = num_cols.div_ceil(8);
598 let min_size = null_bitmap_size + num_cols * 4;
599
600 if data.len() < min_size {
601 return Err(SochDBError::Internal(format!(
602 "PackedRow data too short: {} < {}",
603 data.len(),
604 min_size
605 )));
606 }
607
608 Ok(Self {
609 data,
610 num_cols: num_cols as u16,
611 null_bitmap_size,
612 })
613 }
614
615 pub fn unpack(&self, schema: &PackedTableSchema) -> HashMap<String, SochValue> {
617 let mut result = HashMap::with_capacity(schema.columns.len());
618
619 for (i, col) in schema.columns.iter().enumerate() {
620 if let Some(value) = self.get_column(i, col.col_type)
621 && (!matches!(value, SochValue::Null) || col.nullable)
622 {
623 result.insert(col.name.clone(), value);
624 }
625 }
626
627 result
628 }
629
630 #[inline]
632 pub fn num_columns(&self) -> usize {
633 self.num_cols as usize
634 }
635
636 #[inline]
638 pub fn size(&self) -> usize {
639 self.data.len()
640 }
641}
642
643pub struct PackedRowBuilder {
645 schema: PackedTableSchema,
646 values: HashMap<String, SochValue>,
647}
648
649impl PackedRowBuilder {
650 pub fn new(schema: PackedTableSchema) -> Self {
652 let capacity = schema.columns.len();
653 Self {
654 schema,
655 values: HashMap::with_capacity(capacity),
656 }
657 }
658
659 pub fn set(mut self, name: impl Into<String>, value: SochValue) -> Self {
661 self.values.insert(name.into(), value);
662 self
663 }
664
665 pub fn set_int(self, name: impl Into<String>, value: i64) -> Self {
667 self.set(name, SochValue::Int(value))
668 }
669
670 pub fn set_text(self, name: impl Into<String>, value: impl Into<String>) -> Self {
672 self.set(name, SochValue::Text(value.into()))
673 }
674
675 pub fn set_float(self, name: impl Into<String>, value: f64) -> Self {
677 self.set(name, SochValue::Float(value))
678 }
679
680 pub fn set_bool(self, name: impl Into<String>, value: bool) -> Self {
682 self.set(name, SochValue::Bool(value))
683 }
684
685 pub fn build(self) -> PackedRow {
687 PackedRow::pack(&self.schema, &self.values)
688 }
689}
690
691#[cfg(test)]
692mod tests {
693 use super::*;
694
695 fn test_schema() -> PackedTableSchema {
696 PackedTableSchema::new(
697 "test",
698 vec![
699 PackedColumnDef {
700 name: "id".into(),
701 col_type: PackedColumnType::Int64,
702 nullable: false,
703 },
704 PackedColumnDef {
705 name: "name".into(),
706 col_type: PackedColumnType::Text,
707 nullable: false,
708 },
709 PackedColumnDef {
710 name: "score".into(),
711 col_type: PackedColumnType::Float64,
712 nullable: true,
713 },
714 PackedColumnDef {
715 name: "active".into(),
716 col_type: PackedColumnType::Bool,
717 nullable: true,
718 },
719 ],
720 )
721 }
722
723 #[test]
724 fn test_pack_unpack_roundtrip() {
725 let schema = test_schema();
726 let mut values = HashMap::new();
727 values.insert("id".to_string(), SochValue::Int(42));
728 values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
729 values.insert("score".to_string(), SochValue::Float(98.5));
730 values.insert("active".to_string(), SochValue::Bool(true));
731
732 let packed = PackedRow::pack(&schema, &values);
733
734 assert_eq!(
736 packed.get_column(0, PackedColumnType::Int64),
737 Some(SochValue::Int(42))
738 );
739 assert_eq!(
740 packed.get_column(1, PackedColumnType::Text),
741 Some(SochValue::Text("Alice".to_string()))
742 );
743 assert_eq!(
744 packed.get_column(2, PackedColumnType::Float64),
745 Some(SochValue::Float(98.5))
746 );
747 assert_eq!(
748 packed.get_column(3, PackedColumnType::Bool),
749 Some(SochValue::Bool(true))
750 );
751
752 let unpacked = packed.unpack(&schema);
754 assert_eq!(unpacked.get("id"), Some(&SochValue::Int(42)));
755 assert_eq!(
756 unpacked.get("name"),
757 Some(&SochValue::Text("Alice".to_string()))
758 );
759 }
760
761 #[test]
762 fn test_null_handling() {
763 let schema = test_schema();
764 let mut values = HashMap::new();
765 values.insert("id".to_string(), SochValue::Int(1));
766 values.insert("name".to_string(), SochValue::Text("Bob".to_string()));
767 let packed = PackedRow::pack(&schema, &values);
770
771 assert_eq!(
772 packed.get_column(0, PackedColumnType::Int64),
773 Some(SochValue::Int(1))
774 );
775 assert_eq!(
776 packed.get_column(2, PackedColumnType::Float64),
777 Some(SochValue::Null)
778 );
779 assert_eq!(
780 packed.get_column(3, PackedColumnType::Bool),
781 Some(SochValue::Null)
782 );
783 }
784
785 #[test]
786 fn test_bytes_roundtrip() {
787 let schema = test_schema();
788 let mut values = HashMap::new();
789 values.insert("id".to_string(), SochValue::Int(100));
790 values.insert("name".to_string(), SochValue::Text("Test".to_string()));
791
792 let packed = PackedRow::pack(&schema, &values);
793 let bytes = packed.as_bytes().to_vec();
794
795 let restored = PackedRow::from_bytes(bytes, schema.columns.len()).unwrap();
796 assert_eq!(
797 restored.get_column(0, PackedColumnType::Int64),
798 Some(SochValue::Int(100))
799 );
800 assert_eq!(
801 restored.get_column(1, PackedColumnType::Text),
802 Some(SochValue::Text("Test".to_string()))
803 );
804 }
805
806 #[test]
807 fn test_builder() {
808 let schema = test_schema();
809 let packed = PackedRowBuilder::new(schema.clone())
810 .set_int("id", 99)
811 .set_text("name", "Builder Test")
812 .set_float("score", 77.5)
813 .set_bool("active", false)
814 .build();
815
816 assert_eq!(packed.get_by_name(&schema, "id"), Some(SochValue::Int(99)));
817 assert_eq!(
818 packed.get_by_name(&schema, "name"),
819 Some(SochValue::Text("Builder Test".to_string()))
820 );
821 assert_eq!(
822 packed.get_by_name(&schema, "score"),
823 Some(SochValue::Float(77.5))
824 );
825 assert_eq!(
826 packed.get_by_name(&schema, "active"),
827 Some(SochValue::Bool(false))
828 );
829 }
830
831 #[test]
832 fn test_size_reduction() {
833 let schema = test_schema();
835 let mut values = HashMap::new();
836 values.insert("id".to_string(), SochValue::Int(42));
837 values.insert("name".to_string(), SochValue::Text("Alice".to_string()));
838 values.insert("score".to_string(), SochValue::Float(98.5));
839 values.insert("active".to_string(), SochValue::Bool(true));
840
841 let packed = PackedRow::pack(&schema, &values);
842
843 assert!(packed.size() < 50, "Packed row should be compact");
846 }
847}