1use std::collections::HashMap;
41use std::sync::atomic::{AtomicU64, Ordering};
42
43#[derive(Debug, Clone, Default)]
45pub struct ValidityBitmap {
46 bits: Vec<u64>,
48 null_count: usize,
50 len: usize,
52}
53
54impl ValidityBitmap {
55 pub fn new_all_valid(len: usize) -> Self {
57 let num_words = len.div_ceil(64);
58 Self {
59 bits: vec![u64::MAX; num_words],
60 null_count: 0,
61 len,
62 }
63 }
64
65 pub fn new_all_null(len: usize) -> Self {
67 let num_words = len.div_ceil(64);
68 Self {
69 bits: vec![0; num_words],
70 null_count: len,
71 len,
72 }
73 }
74
75 #[inline]
77 pub fn is_valid(&self, idx: usize) -> bool {
78 if idx >= self.len {
79 return false;
80 }
81 let word = idx / 64;
82 let bit = idx % 64;
83 (self.bits[word] >> bit) & 1 == 1
84 }
85
86 #[inline]
88 pub fn set_valid(&mut self, idx: usize) {
89 if idx >= self.len {
90 return;
91 }
92 let word = idx / 64;
93 let bit = idx % 64;
94 if !self.is_valid(idx) {
95 self.bits[word] |= 1 << bit;
96 self.null_count = self.null_count.saturating_sub(1);
97 }
98 }
99
100 #[inline]
102 pub fn set_null(&mut self, idx: usize) {
103 if idx >= self.len {
104 return;
105 }
106 let word = idx / 64;
107 let bit = idx % 64;
108 if self.is_valid(idx) {
109 self.bits[word] &= !(1 << bit);
110 self.null_count = self.null_count.saturating_add(1);
111 }
112 }
113
114 pub fn push(&mut self, valid: bool) {
116 let idx = self.len;
117 self.len += 1;
118 let num_words = self.len.div_ceil(64);
119 while self.bits.len() < num_words {
120 self.bits.push(0);
121 }
122 if valid {
123 self.set_valid(idx);
124 } else {
125 self.null_count += 1;
126 }
127 }
128
129 pub fn null_count(&self) -> usize {
131 self.null_count
132 }
133
134 pub fn len(&self) -> usize {
136 self.len
137 }
138
139 pub fn is_empty(&self) -> bool {
141 self.len == 0
142 }
143}
144
145#[derive(Debug, Clone, Default)]
147pub struct ColumnStats {
148 pub min_i64: Option<i64>,
150 pub max_i64: Option<i64>,
151 pub min_f64: Option<f64>,
152 pub max_f64: Option<f64>,
153 pub distinct_count: u64,
155 pub null_count: u64,
157 pub row_count: u64,
159}
160
161impl ColumnStats {
162 pub fn update_i64(&mut self, value: i64) {
164 self.min_i64 = Some(self.min_i64.map_or(value, |m| m.min(value)));
165 self.max_i64 = Some(self.max_i64.map_or(value, |m| m.max(value)));
166 self.row_count += 1;
167 }
168
169 pub fn update_f64(&mut self, value: f64) {
171 self.min_f64 = Some(self.min_f64.map_or(value, |m| m.min(value)));
172 self.max_f64 = Some(self.max_f64.map_or(value, |m| m.max(value)));
173 self.row_count += 1;
174 }
175
176 pub fn update_null(&mut self) {
178 self.null_count += 1;
179 self.row_count += 1;
180 }
181}
182
183#[derive(Debug, Clone)]
185pub enum TypedColumn {
186 Int64 {
188 values: Vec<i64>,
189 validity: ValidityBitmap,
190 stats: ColumnStats,
191 },
192 UInt64 {
194 values: Vec<u64>,
195 validity: ValidityBitmap,
196 stats: ColumnStats,
197 },
198 Float64 {
200 values: Vec<f64>,
201 validity: ValidityBitmap,
202 stats: ColumnStats,
203 },
204 Text {
206 offsets: Vec<u32>,
208 data: Vec<u8>,
210 validity: ValidityBitmap,
211 stats: ColumnStats,
212 },
213 Binary {
215 offsets: Vec<u32>,
216 data: Vec<u8>,
217 validity: ValidityBitmap,
218 stats: ColumnStats,
219 },
220 Bool {
222 values: Vec<u64>,
224 validity: ValidityBitmap,
225 stats: ColumnStats,
226 len: usize,
227 },
228}
229
230impl TypedColumn {
231 pub fn new_int64() -> Self {
233 TypedColumn::Int64 {
234 values: Vec::new(),
235 validity: ValidityBitmap::default(),
236 stats: ColumnStats::default(),
237 }
238 }
239
240 pub fn new_uint64() -> Self {
242 TypedColumn::UInt64 {
243 values: Vec::new(),
244 validity: ValidityBitmap::default(),
245 stats: ColumnStats::default(),
246 }
247 }
248
249 pub fn new_float64() -> Self {
251 TypedColumn::Float64 {
252 values: Vec::new(),
253 validity: ValidityBitmap::default(),
254 stats: ColumnStats::default(),
255 }
256 }
257
258 pub fn new_text() -> Self {
260 TypedColumn::Text {
261 offsets: vec![0], data: Vec::new(),
263 validity: ValidityBitmap::default(),
264 stats: ColumnStats::default(),
265 }
266 }
267
268 pub fn new_binary() -> Self {
270 TypedColumn::Binary {
271 offsets: vec![0],
272 data: Vec::new(),
273 validity: ValidityBitmap::default(),
274 stats: ColumnStats::default(),
275 }
276 }
277
278 pub fn new_bool() -> Self {
280 TypedColumn::Bool {
281 values: Vec::new(),
282 validity: ValidityBitmap::default(),
283 stats: ColumnStats::default(),
284 len: 0,
285 }
286 }
287
288 pub fn len(&self) -> usize {
290 match self {
291 TypedColumn::Int64 { values, .. } => values.len(),
292 TypedColumn::UInt64 { values, .. } => values.len(),
293 TypedColumn::Float64 { values, .. } => values.len(),
294 TypedColumn::Text { offsets, .. } => offsets.len().saturating_sub(1),
295 TypedColumn::Binary { offsets, .. } => offsets.len().saturating_sub(1),
296 TypedColumn::Bool { len, .. } => *len,
297 }
298 }
299
300 pub fn is_empty(&self) -> bool {
302 self.len() == 0
303 }
304
305 pub fn push_i64(&mut self, value: Option<i64>) {
307 if let TypedColumn::Int64 {
308 values,
309 validity,
310 stats,
311 } = self
312 {
313 match value {
314 Some(v) => {
315 values.push(v);
316 validity.push(true);
317 stats.update_i64(v);
318 }
319 None => {
320 values.push(0); validity.push(false);
322 stats.update_null();
323 }
324 }
325 }
326 }
327
328 pub fn push_u64(&mut self, value: Option<u64>) {
330 if let TypedColumn::UInt64 {
331 values,
332 validity,
333 stats,
334 } = self
335 {
336 match value {
337 Some(v) => {
338 values.push(v);
339 validity.push(true);
340 stats.update_i64(v as i64);
341 }
342 None => {
343 values.push(0);
344 validity.push(false);
345 stats.update_null();
346 }
347 }
348 }
349 }
350
351 pub fn push_f64(&mut self, value: Option<f64>) {
353 if let TypedColumn::Float64 {
354 values,
355 validity,
356 stats,
357 } = self
358 {
359 match value {
360 Some(v) => {
361 values.push(v);
362 validity.push(true);
363 stats.update_f64(v);
364 }
365 None => {
366 values.push(0.0);
367 validity.push(false);
368 stats.update_null();
369 }
370 }
371 }
372 }
373
374 pub fn push_text(&mut self, value: Option<&str>) {
376 if let TypedColumn::Text {
377 offsets,
378 data,
379 validity,
380 stats,
381 } = self
382 {
383 match value {
384 Some(s) => {
385 data.extend_from_slice(s.as_bytes());
386 offsets.push(data.len() as u32);
387 validity.push(true);
388 stats.row_count += 1;
389 }
390 None => {
391 offsets.push(data.len() as u32);
392 validity.push(false);
393 stats.update_null();
394 }
395 }
396 }
397 }
398
399 pub fn push_binary(&mut self, value: Option<&[u8]>) {
401 if let TypedColumn::Binary {
402 offsets,
403 data,
404 validity,
405 stats,
406 } = self
407 {
408 match value {
409 Some(b) => {
410 data.extend_from_slice(b);
411 offsets.push(data.len() as u32);
412 validity.push(true);
413 stats.row_count += 1;
414 }
415 None => {
416 offsets.push(data.len() as u32);
417 validity.push(false);
418 stats.update_null();
419 }
420 }
421 }
422 }
423
424 pub fn push_bool(&mut self, value: Option<bool>) {
426 if let TypedColumn::Bool {
427 values,
428 validity,
429 stats,
430 len,
431 } = self
432 {
433 let idx = *len;
434 *len += 1;
435 let num_words = (*len).div_ceil(64);
436 while values.len() < num_words {
437 values.push(0);
438 }
439 match value {
440 Some(v) => {
441 if v {
442 let word = idx / 64;
443 let bit = idx % 64;
444 values[word] |= 1 << bit;
445 }
446 validity.push(true);
447 stats.row_count += 1;
448 }
449 None => {
450 validity.push(false);
451 stats.update_null();
452 }
453 }
454 }
455 }
456
457 pub fn get_i64(&self, idx: usize) -> Option<i64> {
459 if let TypedColumn::Int64 {
460 values, validity, ..
461 } = self
462 && idx < values.len()
463 && validity.is_valid(idx)
464 {
465 return Some(values[idx]);
466 }
467 None
468 }
469
470 pub fn get_u64(&self, idx: usize) -> Option<u64> {
472 if let TypedColumn::UInt64 {
473 values, validity, ..
474 } = self
475 && idx < values.len()
476 && validity.is_valid(idx)
477 {
478 return Some(values[idx]);
479 }
480 None
481 }
482
483 pub fn get_f64(&self, idx: usize) -> Option<f64> {
485 if let TypedColumn::Float64 {
486 values, validity, ..
487 } = self
488 && idx < values.len()
489 && validity.is_valid(idx)
490 {
491 return Some(values[idx]);
492 }
493 None
494 }
495
496 pub fn get_text(&self, idx: usize) -> Option<&str> {
498 if let TypedColumn::Text {
499 offsets,
500 data,
501 validity,
502 ..
503 } = self
504 && idx + 1 < offsets.len()
505 && validity.is_valid(idx)
506 {
507 let start = offsets[idx] as usize;
508 let end = offsets[idx + 1] as usize;
509 return std::str::from_utf8(&data[start..end]).ok();
510 }
511 None
512 }
513
514 pub fn get_binary(&self, idx: usize) -> Option<&[u8]> {
516 if let TypedColumn::Binary {
517 offsets,
518 data,
519 validity,
520 ..
521 } = self
522 && idx + 1 < offsets.len()
523 && validity.is_valid(idx)
524 {
525 let start = offsets[idx] as usize;
526 let end = offsets[idx + 1] as usize;
527 return Some(&data[start..end]);
528 }
529 None
530 }
531
532 pub fn get_bool(&self, idx: usize) -> Option<bool> {
534 if let TypedColumn::Bool {
535 values,
536 validity,
537 len,
538 ..
539 } = self
540 && idx < *len
541 && validity.is_valid(idx)
542 {
543 let word = idx / 64;
544 let bit = idx % 64;
545 return Some((values[word] >> bit) & 1 == 1);
546 }
547 None
548 }
549
550 pub fn is_null(&self, idx: usize) -> bool {
552 match self {
553 TypedColumn::Int64 { validity, .. } => !validity.is_valid(idx),
554 TypedColumn::UInt64 { validity, .. } => !validity.is_valid(idx),
555 TypedColumn::Float64 { validity, .. } => !validity.is_valid(idx),
556 TypedColumn::Text { validity, .. } => !validity.is_valid(idx),
557 TypedColumn::Binary { validity, .. } => !validity.is_valid(idx),
558 TypedColumn::Bool { validity, .. } => !validity.is_valid(idx),
559 }
560 }
561
562 pub fn stats(&self) -> &ColumnStats {
564 match self {
565 TypedColumn::Int64 { stats, .. } => stats,
566 TypedColumn::UInt64 { stats, .. } => stats,
567 TypedColumn::Float64 { stats, .. } => stats,
568 TypedColumn::Text { stats, .. } => stats,
569 TypedColumn::Binary { stats, .. } => stats,
570 TypedColumn::Bool { stats, .. } => stats,
571 }
572 }
573
574 #[inline]
576 pub fn sum_i64(&self) -> i64 {
577 if let TypedColumn::Int64 {
578 values, validity, ..
579 } = self
580 {
581 if validity.null_count() == 0 {
583 values.iter().sum()
584 } else {
585 values
587 .iter()
588 .enumerate()
589 .filter(|(i, _)| validity.is_valid(*i))
590 .map(|(_, v)| *v)
591 .sum()
592 }
593 } else {
594 0
595 }
596 }
597
598 #[inline]
600 pub fn sum_f64(&self) -> f64 {
601 if let TypedColumn::Float64 {
602 values, validity, ..
603 } = self
604 {
605 if validity.null_count() == 0 {
606 values.iter().sum()
607 } else {
608 values
609 .iter()
610 .enumerate()
611 .filter(|(i, _)| validity.is_valid(*i))
612 .map(|(_, v)| *v)
613 .sum()
614 }
615 } else {
616 0.0
617 }
618 }
619
620 pub fn memory_size(&self) -> usize {
622 match self {
623 TypedColumn::Int64 {
624 values, validity, ..
625 } => values.len() * 8 + validity.bits.len() * 8,
626 TypedColumn::UInt64 {
627 values, validity, ..
628 } => values.len() * 8 + validity.bits.len() * 8,
629 TypedColumn::Float64 {
630 values, validity, ..
631 } => values.len() * 8 + validity.bits.len() * 8,
632 TypedColumn::Text {
633 offsets,
634 data,
635 validity,
636 ..
637 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
638 TypedColumn::Binary {
639 offsets,
640 data,
641 validity,
642 ..
643 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
644 TypedColumn::Bool {
645 values, validity, ..
646 } => values.len() * 8 + validity.bits.len() * 8,
647 }
648 }
649}
650
651#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
653pub enum ColumnType {
654 Int64,
655 UInt64,
656 Float64,
657 Text,
658 Binary,
659 Bool,
660}
661
662impl ColumnType {
663 pub fn create_column(&self) -> TypedColumn {
665 match self {
666 ColumnType::Int64 => TypedColumn::new_int64(),
667 ColumnType::UInt64 => TypedColumn::new_uint64(),
668 ColumnType::Float64 => TypedColumn::new_float64(),
669 ColumnType::Text => TypedColumn::new_text(),
670 ColumnType::Binary => TypedColumn::new_binary(),
671 ColumnType::Bool => TypedColumn::new_bool(),
672 }
673 }
674}
675
676#[derive(Debug, Clone)]
678pub struct ColumnChunk {
679 pub name: String,
681 pub column_type: ColumnType,
683 pub data: TypedColumn,
685}
686
687impl ColumnChunk {
688 pub fn new(name: impl Into<String>, column_type: ColumnType) -> Self {
690 Self {
691 name: name.into(),
692 column_type,
693 data: column_type.create_column(),
694 }
695 }
696
697 pub fn stats(&self) -> &ColumnStats {
699 self.data.stats()
700 }
701}
702
703#[derive(Debug)]
705pub struct ColumnarTable {
706 pub name: String,
708 columns: HashMap<String, ColumnChunk>,
710 column_order: Vec<String>,
712 primary_key: Option<String>,
714 pk_index: std::collections::BTreeMap<i64, u32>,
716 row_count: AtomicU64,
718}
719
720impl Clone for ColumnarTable {
721 fn clone(&self) -> Self {
722 Self {
723 name: self.name.clone(),
724 columns: self.columns.clone(),
725 column_order: self.column_order.clone(),
726 primary_key: self.primary_key.clone(),
727 pk_index: self.pk_index.clone(),
728 row_count: AtomicU64::new(self.row_count.load(std::sync::atomic::Ordering::Relaxed)),
729 }
730 }
731}
732
733impl ColumnarTable {
734 pub fn new(name: impl Into<String>) -> Self {
736 Self {
737 name: name.into(),
738 columns: HashMap::new(),
739 column_order: Vec::new(),
740 primary_key: None,
741 pk_index: std::collections::BTreeMap::new(),
742 row_count: AtomicU64::new(0),
743 }
744 }
745
746 pub fn add_column(&mut self, name: impl Into<String>, column_type: ColumnType) -> &mut Self {
748 let name = name.into();
749 self.column_order.push(name.clone());
750 self.columns
751 .insert(name.clone(), ColumnChunk::new(name, column_type));
752 self
753 }
754
755 pub fn set_primary_key(&mut self, column: impl Into<String>) -> &mut Self {
757 self.primary_key = Some(column.into());
758 self
759 }
760
761 pub fn row_count(&self) -> u64 {
763 self.row_count.load(Ordering::Relaxed)
764 }
765
766 pub fn get_column(&self, name: &str) -> Option<&ColumnChunk> {
768 self.columns.get(name)
769 }
770
771 pub fn get_column_mut(&mut self, name: &str) -> Option<&mut ColumnChunk> {
773 self.columns.get_mut(name)
774 }
775
776 pub fn get_by_pk(&self, pk: i64) -> Option<u32> {
778 self.pk_index.get(&pk).copied()
779 }
780
781 pub fn insert_row(&mut self, values: &HashMap<String, ColumnValue>) -> u32 {
783 let row_idx = self.row_count.fetch_add(1, Ordering::Relaxed) as u32;
784
785 for col_name in &self.column_order {
786 let chunk = self.columns.get_mut(col_name).unwrap();
787 let value = values.get(col_name);
788
789 match &mut chunk.data {
790 TypedColumn::Int64 {
791 values,
792 validity,
793 stats,
794 } => {
795 match value {
796 Some(ColumnValue::Int64(v)) => {
797 values.push(*v);
798 validity.push(true);
799 stats.update_i64(*v);
800
801 if self.primary_key.as_ref() == Some(col_name) {
803 self.pk_index.insert(*v, row_idx);
804 }
805 }
806 _ => {
807 values.push(0);
808 validity.push(false);
809 stats.update_null();
810 }
811 }
812 }
813 TypedColumn::UInt64 {
814 values,
815 validity,
816 stats,
817 } => match value {
818 Some(ColumnValue::UInt64(v)) => {
819 values.push(*v);
820 validity.push(true);
821 stats.update_i64(*v as i64);
822 }
823 _ => {
824 values.push(0);
825 validity.push(false);
826 stats.update_null();
827 }
828 },
829 TypedColumn::Float64 {
830 values,
831 validity,
832 stats,
833 } => match value {
834 Some(ColumnValue::Float64(v)) => {
835 values.push(*v);
836 validity.push(true);
837 stats.update_f64(*v);
838 }
839 _ => {
840 values.push(0.0);
841 validity.push(false);
842 stats.update_null();
843 }
844 },
845 TypedColumn::Text {
846 offsets,
847 data,
848 validity,
849 stats,
850 } => match value {
851 Some(ColumnValue::Text(s)) => {
852 data.extend_from_slice(s.as_bytes());
853 offsets.push(data.len() as u32);
854 validity.push(true);
855 stats.row_count += 1;
856 }
857 _ => {
858 offsets.push(data.len() as u32);
859 validity.push(false);
860 stats.update_null();
861 }
862 },
863 TypedColumn::Binary {
864 offsets,
865 data,
866 validity,
867 stats,
868 } => match value {
869 Some(ColumnValue::Binary(b)) => {
870 data.extend_from_slice(b);
871 offsets.push(data.len() as u32);
872 validity.push(true);
873 stats.row_count += 1;
874 }
875 _ => {
876 offsets.push(data.len() as u32);
877 validity.push(false);
878 stats.update_null();
879 }
880 },
881 TypedColumn::Bool {
882 values,
883 validity,
884 stats,
885 len,
886 } => {
887 let idx = *len;
888 *len += 1;
889 let num_words = (*len).div_ceil(64);
890 while values.len() < num_words {
891 values.push(0);
892 }
893 match value {
894 Some(ColumnValue::Bool(v)) => {
895 if *v {
896 let word = idx / 64;
897 let bit = idx % 64;
898 values[word] |= 1 << bit;
899 }
900 validity.push(true);
901 stats.row_count += 1;
902 }
903 _ => {
904 validity.push(false);
905 stats.update_null();
906 }
907 }
908 }
909 }
910 }
911
912 row_idx
913 }
914
915 pub fn memory_size(&self) -> usize {
917 self.columns.values().map(|c| c.data.memory_size()).sum()
918 }
919
920 pub fn memory_comparison(&self) -> MemoryComparison {
922 let typed_size = self.memory_size();
923 let row_count = self.row_count() as usize;
924 let column_count = self.columns.len();
925
926 let enum_size = row_count * column_count * 32;
928
929 MemoryComparison {
930 typed_bytes: typed_size,
931 enum_bytes: enum_size,
932 savings_ratio: if typed_size > 0 {
933 enum_size as f64 / typed_size as f64
934 } else {
935 1.0
936 },
937 }
938 }
939}
940
941#[derive(Debug, Clone)]
943pub struct MemoryComparison {
944 pub typed_bytes: usize,
945 pub enum_bytes: usize,
946 pub savings_ratio: f64,
947}
948
949#[derive(Debug, Clone)]
951pub enum ColumnValue {
952 Null,
953 Int64(i64),
954 UInt64(u64),
955 Float64(f64),
956 Text(String),
957 Binary(Vec<u8>),
958 Bool(bool),
959}
960
961#[derive(Debug, Default)]
963pub struct ColumnarStore {
964 tables: HashMap<String, ColumnarTable>,
966}
967
968impl ColumnarStore {
969 pub fn new() -> Self {
971 Self {
972 tables: HashMap::new(),
973 }
974 }
975
976 pub fn create_table(&mut self, name: impl Into<String>) -> &mut ColumnarTable {
978 let name = name.into();
979 self.tables
980 .entry(name.clone())
981 .or_insert_with(|| ColumnarTable::new(name))
982 }
983
984 pub fn get_table(&self, name: &str) -> Option<&ColumnarTable> {
986 self.tables.get(name)
987 }
988
989 pub fn get_table_mut(&mut self, name: &str) -> Option<&mut ColumnarTable> {
991 self.tables.get_mut(name)
992 }
993
994 pub fn drop_table(&mut self, name: &str) -> bool {
996 self.tables.remove(name).is_some()
997 }
998
999 pub fn memory_size(&self) -> usize {
1001 self.tables.values().map(|t| t.memory_size()).sum()
1002 }
1003}
1004
1005#[cfg(test)]
1006mod tests {
1007 use super::*;
1008
1009 #[test]
1010 fn test_validity_bitmap() {
1011 let mut bitmap = ValidityBitmap::new_all_valid(10);
1012 assert_eq!(bitmap.len(), 10);
1013 assert_eq!(bitmap.null_count(), 0);
1014 assert!(bitmap.is_valid(0));
1015 assert!(bitmap.is_valid(9));
1016
1017 bitmap.set_null(5);
1018 assert_eq!(bitmap.null_count(), 1);
1019 assert!(!bitmap.is_valid(5));
1020
1021 bitmap.set_valid(5);
1022 assert_eq!(bitmap.null_count(), 0);
1023 assert!(bitmap.is_valid(5));
1024 }
1025
1026 #[test]
1027 fn test_int64_column() {
1028 let mut col = TypedColumn::new_int64();
1029 col.push_i64(Some(100));
1030 col.push_i64(Some(200));
1031 col.push_i64(None);
1032 col.push_i64(Some(300));
1033
1034 assert_eq!(col.len(), 4);
1035 assert_eq!(col.get_i64(0), Some(100));
1036 assert_eq!(col.get_i64(1), Some(200));
1037 assert_eq!(col.get_i64(2), None);
1038 assert_eq!(col.get_i64(3), Some(300));
1039 assert!(col.is_null(2));
1040
1041 assert_eq!(col.sum_i64(), 600);
1042 }
1043
1044 #[test]
1045 fn test_text_column() {
1046 let mut col = TypedColumn::new_text();
1047 col.push_text(Some("hello"));
1048 col.push_text(Some("world"));
1049 col.push_text(None);
1050 col.push_text(Some("test"));
1051
1052 assert_eq!(col.len(), 4);
1053 assert_eq!(col.get_text(0), Some("hello"));
1054 assert_eq!(col.get_text(1), Some("world"));
1055 assert_eq!(col.get_text(2), None);
1056 assert_eq!(col.get_text(3), Some("test"));
1057 }
1058
1059 #[test]
1060 fn test_bool_column() {
1061 let mut col = TypedColumn::new_bool();
1062 col.push_bool(Some(true));
1063 col.push_bool(Some(false));
1064 col.push_bool(None);
1065 col.push_bool(Some(true));
1066
1067 assert_eq!(col.len(), 4);
1068 assert_eq!(col.get_bool(0), Some(true));
1069 assert_eq!(col.get_bool(1), Some(false));
1070 assert_eq!(col.get_bool(2), None);
1071 assert_eq!(col.get_bool(3), Some(true));
1072
1073 assert!(col.memory_size() < 32);
1076 }
1077
1078 #[test]
1079 fn test_columnar_table() {
1080 let mut table = ColumnarTable::new("users");
1081 table.add_column("id", ColumnType::Int64);
1082 table.add_column("name", ColumnType::Text);
1083 table.add_column("active", ColumnType::Bool);
1084 table.set_primary_key("id");
1085
1086 let mut row1 = HashMap::new();
1087 row1.insert("id".to_string(), ColumnValue::Int64(1));
1088 row1.insert("name".to_string(), ColumnValue::Text("Alice".to_string()));
1089 row1.insert("active".to_string(), ColumnValue::Bool(true));
1090 table.insert_row(&row1);
1091
1092 let mut row2 = HashMap::new();
1093 row2.insert("id".to_string(), ColumnValue::Int64(2));
1094 row2.insert("name".to_string(), ColumnValue::Text("Bob".to_string()));
1095 row2.insert("active".to_string(), ColumnValue::Bool(false));
1096 table.insert_row(&row2);
1097
1098 assert_eq!(table.row_count(), 2);
1099 assert_eq!(table.get_by_pk(1), Some(0));
1100 assert_eq!(table.get_by_pk(2), Some(1));
1101 assert_eq!(table.get_by_pk(3), None);
1102
1103 let id_col = table.get_column("id").unwrap();
1104 assert_eq!(id_col.data.get_i64(0), Some(1));
1105 assert_eq!(id_col.data.get_i64(1), Some(2));
1106 }
1107
1108 #[test]
1109 fn test_memory_savings() {
1110 let mut table = ColumnarTable::new("test");
1111 table.add_column("id", ColumnType::Int64);
1112 table.add_column("value", ColumnType::Float64);
1113 table.add_column("flag", ColumnType::Bool);
1114
1115 for i in 0..1000 {
1117 let mut row = HashMap::new();
1118 row.insert("id".to_string(), ColumnValue::Int64(i));
1119 row.insert("value".to_string(), ColumnValue::Float64(i as f64 * 1.5));
1120 row.insert("flag".to_string(), ColumnValue::Bool(i % 2 == 0));
1121 table.insert_row(&row);
1122 }
1123
1124 let comparison = table.memory_comparison();
1125
1126 assert!(
1130 comparison.savings_ratio > 3.0,
1131 "Expected 3x+ savings, got {:.2}x",
1132 comparison.savings_ratio
1133 );
1134 }
1135
1136 #[test]
1137 fn test_simd_sum() {
1138 let mut col = TypedColumn::new_int64();
1139 for i in 0..10000 {
1140 col.push_i64(Some(i));
1141 }
1142
1143 let sum = col.sum_i64();
1144 let expected: i64 = (0..10000).sum();
1145 assert_eq!(sum, expected);
1146 }
1147
1148 #[test]
1149 fn test_columnar_store() {
1150 let mut store = ColumnarStore::new();
1151
1152 {
1153 let table = store.create_table("users");
1154 table.add_column("id", ColumnType::Int64);
1155 table.add_column("name", ColumnType::Text);
1156 }
1157
1158 assert!(store.get_table("users").is_some());
1159 assert!(store.get_table("orders").is_none());
1160
1161 store.drop_table("users");
1162 assert!(store.get_table("users").is_none());
1163 }
1164
1165 #[test]
1166 fn test_column_stats() {
1167 let mut col = TypedColumn::new_int64();
1168 col.push_i64(Some(10));
1169 col.push_i64(Some(50));
1170 col.push_i64(None);
1171 col.push_i64(Some(30));
1172 col.push_i64(Some(20));
1173
1174 let stats = col.stats();
1175 assert_eq!(stats.min_i64, Some(10));
1176 assert_eq!(stats.max_i64, Some(50));
1177 assert_eq!(stats.null_count, 1);
1178 assert_eq!(stats.row_count, 5);
1179 }
1180}