1use std::collections::HashMap;
38use std::sync::atomic::{AtomicU64, Ordering};
39
40#[derive(Debug, Clone, Default)]
42pub struct ValidityBitmap {
43 bits: Vec<u64>,
45 null_count: usize,
47 len: usize,
49}
50
51impl ValidityBitmap {
52 pub fn new_all_valid(len: usize) -> Self {
54 let num_words = len.div_ceil(64);
55 Self {
56 bits: vec![u64::MAX; num_words],
57 null_count: 0,
58 len,
59 }
60 }
61
62 pub fn new_all_null(len: usize) -> Self {
64 let num_words = len.div_ceil(64);
65 Self {
66 bits: vec![0; num_words],
67 null_count: len,
68 len,
69 }
70 }
71
72 #[inline]
74 pub fn is_valid(&self, idx: usize) -> bool {
75 if idx >= self.len {
76 return false;
77 }
78 let word = idx / 64;
79 let bit = idx % 64;
80 (self.bits[word] >> bit) & 1 == 1
81 }
82
83 #[inline]
85 pub fn set_valid(&mut self, idx: usize) {
86 if idx >= self.len {
87 return;
88 }
89 let word = idx / 64;
90 let bit = idx % 64;
91 if !self.is_valid(idx) {
92 self.bits[word] |= 1 << bit;
93 self.null_count = self.null_count.saturating_sub(1);
94 }
95 }
96
97 #[inline]
99 pub fn set_null(&mut self, idx: usize) {
100 if idx >= self.len {
101 return;
102 }
103 let word = idx / 64;
104 let bit = idx % 64;
105 if self.is_valid(idx) {
106 self.bits[word] &= !(1 << bit);
107 self.null_count = self.null_count.saturating_add(1);
108 }
109 }
110
111 pub fn push(&mut self, valid: bool) {
113 let idx = self.len;
114 self.len += 1;
115 let num_words = self.len.div_ceil(64);
116 while self.bits.len() < num_words {
117 self.bits.push(0);
118 }
119 if valid {
120 self.set_valid(idx);
121 } else {
122 self.null_count += 1;
123 }
124 }
125
126 pub fn null_count(&self) -> usize {
128 self.null_count
129 }
130
131 pub fn len(&self) -> usize {
133 self.len
134 }
135
136 pub fn is_empty(&self) -> bool {
138 self.len == 0
139 }
140}
141
142#[derive(Debug, Clone, Default)]
144pub struct ColumnStats {
145 pub min_i64: Option<i64>,
147 pub max_i64: Option<i64>,
148 pub min_f64: Option<f64>,
149 pub max_f64: Option<f64>,
150 pub distinct_count: u64,
152 pub null_count: u64,
154 pub row_count: u64,
156}
157
158impl ColumnStats {
159 pub fn update_i64(&mut self, value: i64) {
161 self.min_i64 = Some(self.min_i64.map_or(value, |m| m.min(value)));
162 self.max_i64 = Some(self.max_i64.map_or(value, |m| m.max(value)));
163 self.row_count += 1;
164 }
165
166 pub fn update_f64(&mut self, value: f64) {
168 self.min_f64 = Some(self.min_f64.map_or(value, |m| m.min(value)));
169 self.max_f64 = Some(self.max_f64.map_or(value, |m| m.max(value)));
170 self.row_count += 1;
171 }
172
173 pub fn update_null(&mut self) {
175 self.null_count += 1;
176 self.row_count += 1;
177 }
178}
179
180#[derive(Debug, Clone)]
182pub enum TypedColumn {
183 Int64 {
185 values: Vec<i64>,
186 validity: ValidityBitmap,
187 stats: ColumnStats,
188 },
189 UInt64 {
191 values: Vec<u64>,
192 validity: ValidityBitmap,
193 stats: ColumnStats,
194 },
195 Float64 {
197 values: Vec<f64>,
198 validity: ValidityBitmap,
199 stats: ColumnStats,
200 },
201 Text {
203 offsets: Vec<u32>,
205 data: Vec<u8>,
207 validity: ValidityBitmap,
208 stats: ColumnStats,
209 },
210 Binary {
212 offsets: Vec<u32>,
213 data: Vec<u8>,
214 validity: ValidityBitmap,
215 stats: ColumnStats,
216 },
217 Bool {
219 values: Vec<u64>,
221 validity: ValidityBitmap,
222 stats: ColumnStats,
223 len: usize,
224 },
225}
226
227impl TypedColumn {
228 pub fn new_int64() -> Self {
230 TypedColumn::Int64 {
231 values: Vec::new(),
232 validity: ValidityBitmap::default(),
233 stats: ColumnStats::default(),
234 }
235 }
236
237 pub fn new_uint64() -> Self {
239 TypedColumn::UInt64 {
240 values: Vec::new(),
241 validity: ValidityBitmap::default(),
242 stats: ColumnStats::default(),
243 }
244 }
245
246 pub fn new_float64() -> Self {
248 TypedColumn::Float64 {
249 values: Vec::new(),
250 validity: ValidityBitmap::default(),
251 stats: ColumnStats::default(),
252 }
253 }
254
255 pub fn new_text() -> Self {
257 TypedColumn::Text {
258 offsets: vec![0], data: Vec::new(),
260 validity: ValidityBitmap::default(),
261 stats: ColumnStats::default(),
262 }
263 }
264
265 pub fn new_binary() -> Self {
267 TypedColumn::Binary {
268 offsets: vec![0],
269 data: Vec::new(),
270 validity: ValidityBitmap::default(),
271 stats: ColumnStats::default(),
272 }
273 }
274
275 pub fn new_bool() -> Self {
277 TypedColumn::Bool {
278 values: Vec::new(),
279 validity: ValidityBitmap::default(),
280 stats: ColumnStats::default(),
281 len: 0,
282 }
283 }
284
285 pub fn len(&self) -> usize {
287 match self {
288 TypedColumn::Int64 { values, .. } => values.len(),
289 TypedColumn::UInt64 { values, .. } => values.len(),
290 TypedColumn::Float64 { values, .. } => values.len(),
291 TypedColumn::Text { offsets, .. } => offsets.len().saturating_sub(1),
292 TypedColumn::Binary { offsets, .. } => offsets.len().saturating_sub(1),
293 TypedColumn::Bool { len, .. } => *len,
294 }
295 }
296
297 pub fn is_empty(&self) -> bool {
299 self.len() == 0
300 }
301
302 pub fn push_i64(&mut self, value: Option<i64>) {
304 if let TypedColumn::Int64 {
305 values,
306 validity,
307 stats,
308 } = self
309 {
310 match value {
311 Some(v) => {
312 values.push(v);
313 validity.push(true);
314 stats.update_i64(v);
315 }
316 None => {
317 values.push(0); validity.push(false);
319 stats.update_null();
320 }
321 }
322 }
323 }
324
325 pub fn push_u64(&mut self, value: Option<u64>) {
327 if let TypedColumn::UInt64 {
328 values,
329 validity,
330 stats,
331 } = self
332 {
333 match value {
334 Some(v) => {
335 values.push(v);
336 validity.push(true);
337 stats.update_i64(v as i64);
338 }
339 None => {
340 values.push(0);
341 validity.push(false);
342 stats.update_null();
343 }
344 }
345 }
346 }
347
348 pub fn push_f64(&mut self, value: Option<f64>) {
350 if let TypedColumn::Float64 {
351 values,
352 validity,
353 stats,
354 } = self
355 {
356 match value {
357 Some(v) => {
358 values.push(v);
359 validity.push(true);
360 stats.update_f64(v);
361 }
362 None => {
363 values.push(0.0);
364 validity.push(false);
365 stats.update_null();
366 }
367 }
368 }
369 }
370
371 pub fn push_text(&mut self, value: Option<&str>) {
373 if let TypedColumn::Text {
374 offsets,
375 data,
376 validity,
377 stats,
378 } = self
379 {
380 match value {
381 Some(s) => {
382 data.extend_from_slice(s.as_bytes());
383 offsets.push(data.len() as u32);
384 validity.push(true);
385 stats.row_count += 1;
386 }
387 None => {
388 offsets.push(data.len() as u32);
389 validity.push(false);
390 stats.update_null();
391 }
392 }
393 }
394 }
395
396 pub fn push_binary(&mut self, value: Option<&[u8]>) {
398 if let TypedColumn::Binary {
399 offsets,
400 data,
401 validity,
402 stats,
403 } = self
404 {
405 match value {
406 Some(b) => {
407 data.extend_from_slice(b);
408 offsets.push(data.len() as u32);
409 validity.push(true);
410 stats.row_count += 1;
411 }
412 None => {
413 offsets.push(data.len() as u32);
414 validity.push(false);
415 stats.update_null();
416 }
417 }
418 }
419 }
420
421 pub fn push_bool(&mut self, value: Option<bool>) {
423 if let TypedColumn::Bool {
424 values,
425 validity,
426 stats,
427 len,
428 } = self
429 {
430 let idx = *len;
431 *len += 1;
432 let num_words = (*len).div_ceil(64);
433 while values.len() < num_words {
434 values.push(0);
435 }
436 match value {
437 Some(v) => {
438 if v {
439 let word = idx / 64;
440 let bit = idx % 64;
441 values[word] |= 1 << bit;
442 }
443 validity.push(true);
444 stats.row_count += 1;
445 }
446 None => {
447 validity.push(false);
448 stats.update_null();
449 }
450 }
451 }
452 }
453
454 pub fn get_i64(&self, idx: usize) -> Option<i64> {
456 if let TypedColumn::Int64 {
457 values, validity, ..
458 } = self
459 && idx < values.len()
460 && validity.is_valid(idx)
461 {
462 return Some(values[idx]);
463 }
464 None
465 }
466
467 pub fn get_u64(&self, idx: usize) -> Option<u64> {
469 if let TypedColumn::UInt64 {
470 values, validity, ..
471 } = self
472 && idx < values.len()
473 && validity.is_valid(idx)
474 {
475 return Some(values[idx]);
476 }
477 None
478 }
479
480 pub fn get_f64(&self, idx: usize) -> Option<f64> {
482 if let TypedColumn::Float64 {
483 values, validity, ..
484 } = self
485 && idx < values.len()
486 && validity.is_valid(idx)
487 {
488 return Some(values[idx]);
489 }
490 None
491 }
492
493 pub fn get_text(&self, idx: usize) -> Option<&str> {
495 if let TypedColumn::Text {
496 offsets,
497 data,
498 validity,
499 ..
500 } = self
501 && idx + 1 < offsets.len()
502 && validity.is_valid(idx)
503 {
504 let start = offsets[idx] as usize;
505 let end = offsets[idx + 1] as usize;
506 return std::str::from_utf8(&data[start..end]).ok();
507 }
508 None
509 }
510
511 pub fn get_binary(&self, idx: usize) -> Option<&[u8]> {
513 if let TypedColumn::Binary {
514 offsets,
515 data,
516 validity,
517 ..
518 } = self
519 && idx + 1 < offsets.len()
520 && validity.is_valid(idx)
521 {
522 let start = offsets[idx] as usize;
523 let end = offsets[idx + 1] as usize;
524 return Some(&data[start..end]);
525 }
526 None
527 }
528
529 pub fn get_bool(&self, idx: usize) -> Option<bool> {
531 if let TypedColumn::Bool {
532 values,
533 validity,
534 len,
535 ..
536 } = self
537 && idx < *len
538 && validity.is_valid(idx)
539 {
540 let word = idx / 64;
541 let bit = idx % 64;
542 return Some((values[word] >> bit) & 1 == 1);
543 }
544 None
545 }
546
547 pub fn is_null(&self, idx: usize) -> bool {
549 match self {
550 TypedColumn::Int64 { validity, .. } => !validity.is_valid(idx),
551 TypedColumn::UInt64 { validity, .. } => !validity.is_valid(idx),
552 TypedColumn::Float64 { validity, .. } => !validity.is_valid(idx),
553 TypedColumn::Text { validity, .. } => !validity.is_valid(idx),
554 TypedColumn::Binary { validity, .. } => !validity.is_valid(idx),
555 TypedColumn::Bool { validity, .. } => !validity.is_valid(idx),
556 }
557 }
558
559 pub fn stats(&self) -> &ColumnStats {
561 match self {
562 TypedColumn::Int64 { stats, .. } => stats,
563 TypedColumn::UInt64 { stats, .. } => stats,
564 TypedColumn::Float64 { stats, .. } => stats,
565 TypedColumn::Text { stats, .. } => stats,
566 TypedColumn::Binary { stats, .. } => stats,
567 TypedColumn::Bool { stats, .. } => stats,
568 }
569 }
570
571 #[inline]
573 pub fn sum_i64(&self) -> i64 {
574 if let TypedColumn::Int64 {
575 values, validity, ..
576 } = self
577 {
578 if validity.null_count() == 0 {
580 values.iter().sum()
581 } else {
582 values
584 .iter()
585 .enumerate()
586 .filter(|(i, _)| validity.is_valid(*i))
587 .map(|(_, v)| *v)
588 .sum()
589 }
590 } else {
591 0
592 }
593 }
594
595 #[inline]
597 pub fn sum_f64(&self) -> f64 {
598 if let TypedColumn::Float64 {
599 values, validity, ..
600 } = self
601 {
602 if validity.null_count() == 0 {
603 values.iter().sum()
604 } else {
605 values
606 .iter()
607 .enumerate()
608 .filter(|(i, _)| validity.is_valid(*i))
609 .map(|(_, v)| *v)
610 .sum()
611 }
612 } else {
613 0.0
614 }
615 }
616
617 pub fn memory_size(&self) -> usize {
619 match self {
620 TypedColumn::Int64 {
621 values, validity, ..
622 } => values.len() * 8 + validity.bits.len() * 8,
623 TypedColumn::UInt64 {
624 values, validity, ..
625 } => values.len() * 8 + validity.bits.len() * 8,
626 TypedColumn::Float64 {
627 values, validity, ..
628 } => values.len() * 8 + validity.bits.len() * 8,
629 TypedColumn::Text {
630 offsets,
631 data,
632 validity,
633 ..
634 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
635 TypedColumn::Binary {
636 offsets,
637 data,
638 validity,
639 ..
640 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
641 TypedColumn::Bool {
642 values, validity, ..
643 } => values.len() * 8 + validity.bits.len() * 8,
644 }
645 }
646}
647
648#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
650pub enum ColumnType {
651 Int64,
652 UInt64,
653 Float64,
654 Text,
655 Binary,
656 Bool,
657}
658
659impl ColumnType {
660 pub fn create_column(&self) -> TypedColumn {
662 match self {
663 ColumnType::Int64 => TypedColumn::new_int64(),
664 ColumnType::UInt64 => TypedColumn::new_uint64(),
665 ColumnType::Float64 => TypedColumn::new_float64(),
666 ColumnType::Text => TypedColumn::new_text(),
667 ColumnType::Binary => TypedColumn::new_binary(),
668 ColumnType::Bool => TypedColumn::new_bool(),
669 }
670 }
671}
672
673#[derive(Debug, Clone)]
675pub struct ColumnChunk {
676 pub name: String,
678 pub column_type: ColumnType,
680 pub data: TypedColumn,
682}
683
684impl ColumnChunk {
685 pub fn new(name: impl Into<String>, column_type: ColumnType) -> Self {
687 Self {
688 name: name.into(),
689 column_type,
690 data: column_type.create_column(),
691 }
692 }
693
694 pub fn stats(&self) -> &ColumnStats {
696 self.data.stats()
697 }
698}
699
700#[derive(Debug)]
702pub struct ColumnarTable {
703 pub name: String,
705 columns: HashMap<String, ColumnChunk>,
707 column_order: Vec<String>,
709 primary_key: Option<String>,
711 pk_index: std::collections::BTreeMap<i64, u32>,
713 row_count: AtomicU64,
715}
716
717impl Clone for ColumnarTable {
718 fn clone(&self) -> Self {
719 Self {
720 name: self.name.clone(),
721 columns: self.columns.clone(),
722 column_order: self.column_order.clone(),
723 primary_key: self.primary_key.clone(),
724 pk_index: self.pk_index.clone(),
725 row_count: AtomicU64::new(self.row_count.load(std::sync::atomic::Ordering::Relaxed)),
726 }
727 }
728}
729
730impl ColumnarTable {
731 pub fn new(name: impl Into<String>) -> Self {
733 Self {
734 name: name.into(),
735 columns: HashMap::new(),
736 column_order: Vec::new(),
737 primary_key: None,
738 pk_index: std::collections::BTreeMap::new(),
739 row_count: AtomicU64::new(0),
740 }
741 }
742
743 pub fn add_column(&mut self, name: impl Into<String>, column_type: ColumnType) -> &mut Self {
745 let name = name.into();
746 self.column_order.push(name.clone());
747 self.columns
748 .insert(name.clone(), ColumnChunk::new(name, column_type));
749 self
750 }
751
752 pub fn set_primary_key(&mut self, column: impl Into<String>) -> &mut Self {
754 self.primary_key = Some(column.into());
755 self
756 }
757
758 pub fn row_count(&self) -> u64 {
760 self.row_count.load(Ordering::Relaxed)
761 }
762
763 pub fn get_column(&self, name: &str) -> Option<&ColumnChunk> {
765 self.columns.get(name)
766 }
767
768 pub fn get_column_mut(&mut self, name: &str) -> Option<&mut ColumnChunk> {
770 self.columns.get_mut(name)
771 }
772
773 pub fn get_by_pk(&self, pk: i64) -> Option<u32> {
775 self.pk_index.get(&pk).copied()
776 }
777
778 pub fn insert_row(&mut self, values: &HashMap<String, ColumnValue>) -> u32 {
780 let row_idx = self.row_count.fetch_add(1, Ordering::Relaxed) as u32;
781
782 for col_name in &self.column_order {
783 let chunk = self.columns.get_mut(col_name).unwrap();
784 let value = values.get(col_name);
785
786 match &mut chunk.data {
787 TypedColumn::Int64 {
788 values,
789 validity,
790 stats,
791 } => {
792 match value {
793 Some(ColumnValue::Int64(v)) => {
794 values.push(*v);
795 validity.push(true);
796 stats.update_i64(*v);
797
798 if self.primary_key.as_ref() == Some(col_name) {
800 self.pk_index.insert(*v, row_idx);
801 }
802 }
803 _ => {
804 values.push(0);
805 validity.push(false);
806 stats.update_null();
807 }
808 }
809 }
810 TypedColumn::UInt64 {
811 values,
812 validity,
813 stats,
814 } => match value {
815 Some(ColumnValue::UInt64(v)) => {
816 values.push(*v);
817 validity.push(true);
818 stats.update_i64(*v as i64);
819 }
820 _ => {
821 values.push(0);
822 validity.push(false);
823 stats.update_null();
824 }
825 },
826 TypedColumn::Float64 {
827 values,
828 validity,
829 stats,
830 } => match value {
831 Some(ColumnValue::Float64(v)) => {
832 values.push(*v);
833 validity.push(true);
834 stats.update_f64(*v);
835 }
836 _ => {
837 values.push(0.0);
838 validity.push(false);
839 stats.update_null();
840 }
841 },
842 TypedColumn::Text {
843 offsets,
844 data,
845 validity,
846 stats,
847 } => match value {
848 Some(ColumnValue::Text(s)) => {
849 data.extend_from_slice(s.as_bytes());
850 offsets.push(data.len() as u32);
851 validity.push(true);
852 stats.row_count += 1;
853 }
854 _ => {
855 offsets.push(data.len() as u32);
856 validity.push(false);
857 stats.update_null();
858 }
859 },
860 TypedColumn::Binary {
861 offsets,
862 data,
863 validity,
864 stats,
865 } => match value {
866 Some(ColumnValue::Binary(b)) => {
867 data.extend_from_slice(b);
868 offsets.push(data.len() as u32);
869 validity.push(true);
870 stats.row_count += 1;
871 }
872 _ => {
873 offsets.push(data.len() as u32);
874 validity.push(false);
875 stats.update_null();
876 }
877 },
878 TypedColumn::Bool {
879 values,
880 validity,
881 stats,
882 len,
883 } => {
884 let idx = *len;
885 *len += 1;
886 let num_words = (*len).div_ceil(64);
887 while values.len() < num_words {
888 values.push(0);
889 }
890 match value {
891 Some(ColumnValue::Bool(v)) => {
892 if *v {
893 let word = idx / 64;
894 let bit = idx % 64;
895 values[word] |= 1 << bit;
896 }
897 validity.push(true);
898 stats.row_count += 1;
899 }
900 _ => {
901 validity.push(false);
902 stats.update_null();
903 }
904 }
905 }
906 }
907 }
908
909 row_idx
910 }
911
912 pub fn memory_size(&self) -> usize {
914 self.columns.values().map(|c| c.data.memory_size()).sum()
915 }
916
917 pub fn memory_comparison(&self) -> MemoryComparison {
919 let typed_size = self.memory_size();
920 let row_count = self.row_count() as usize;
921 let column_count = self.columns.len();
922
923 let enum_size = row_count * column_count * 32;
925
926 MemoryComparison {
927 typed_bytes: typed_size,
928 enum_bytes: enum_size,
929 savings_ratio: if typed_size > 0 {
930 enum_size as f64 / typed_size as f64
931 } else {
932 1.0
933 },
934 }
935 }
936}
937
938#[derive(Debug, Clone)]
940pub struct MemoryComparison {
941 pub typed_bytes: usize,
942 pub enum_bytes: usize,
943 pub savings_ratio: f64,
944}
945
946#[derive(Debug, Clone)]
948pub enum ColumnValue {
949 Null,
950 Int64(i64),
951 UInt64(u64),
952 Float64(f64),
953 Text(String),
954 Binary(Vec<u8>),
955 Bool(bool),
956}
957
958#[derive(Debug, Default)]
960pub struct ColumnarStore {
961 tables: HashMap<String, ColumnarTable>,
963}
964
965impl ColumnarStore {
966 pub fn new() -> Self {
968 Self {
969 tables: HashMap::new(),
970 }
971 }
972
973 pub fn create_table(&mut self, name: impl Into<String>) -> &mut ColumnarTable {
975 let name = name.into();
976 self.tables
977 .entry(name.clone())
978 .or_insert_with(|| ColumnarTable::new(name))
979 }
980
981 pub fn get_table(&self, name: &str) -> Option<&ColumnarTable> {
983 self.tables.get(name)
984 }
985
986 pub fn get_table_mut(&mut self, name: &str) -> Option<&mut ColumnarTable> {
988 self.tables.get_mut(name)
989 }
990
991 pub fn drop_table(&mut self, name: &str) -> bool {
993 self.tables.remove(name).is_some()
994 }
995
996 pub fn memory_size(&self) -> usize {
998 self.tables.values().map(|t| t.memory_size()).sum()
999 }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004 use super::*;
1005
1006 #[test]
1007 fn test_validity_bitmap() {
1008 let mut bitmap = ValidityBitmap::new_all_valid(10);
1009 assert_eq!(bitmap.len(), 10);
1010 assert_eq!(bitmap.null_count(), 0);
1011 assert!(bitmap.is_valid(0));
1012 assert!(bitmap.is_valid(9));
1013
1014 bitmap.set_null(5);
1015 assert_eq!(bitmap.null_count(), 1);
1016 assert!(!bitmap.is_valid(5));
1017
1018 bitmap.set_valid(5);
1019 assert_eq!(bitmap.null_count(), 0);
1020 assert!(bitmap.is_valid(5));
1021 }
1022
1023 #[test]
1024 fn test_int64_column() {
1025 let mut col = TypedColumn::new_int64();
1026 col.push_i64(Some(100));
1027 col.push_i64(Some(200));
1028 col.push_i64(None);
1029 col.push_i64(Some(300));
1030
1031 assert_eq!(col.len(), 4);
1032 assert_eq!(col.get_i64(0), Some(100));
1033 assert_eq!(col.get_i64(1), Some(200));
1034 assert_eq!(col.get_i64(2), None);
1035 assert_eq!(col.get_i64(3), Some(300));
1036 assert!(col.is_null(2));
1037
1038 assert_eq!(col.sum_i64(), 600);
1039 }
1040
1041 #[test]
1042 fn test_text_column() {
1043 let mut col = TypedColumn::new_text();
1044 col.push_text(Some("hello"));
1045 col.push_text(Some("world"));
1046 col.push_text(None);
1047 col.push_text(Some("test"));
1048
1049 assert_eq!(col.len(), 4);
1050 assert_eq!(col.get_text(0), Some("hello"));
1051 assert_eq!(col.get_text(1), Some("world"));
1052 assert_eq!(col.get_text(2), None);
1053 assert_eq!(col.get_text(3), Some("test"));
1054 }
1055
1056 #[test]
1057 fn test_bool_column() {
1058 let mut col = TypedColumn::new_bool();
1059 col.push_bool(Some(true));
1060 col.push_bool(Some(false));
1061 col.push_bool(None);
1062 col.push_bool(Some(true));
1063
1064 assert_eq!(col.len(), 4);
1065 assert_eq!(col.get_bool(0), Some(true));
1066 assert_eq!(col.get_bool(1), Some(false));
1067 assert_eq!(col.get_bool(2), None);
1068 assert_eq!(col.get_bool(3), Some(true));
1069
1070 assert!(col.memory_size() < 32);
1073 }
1074
1075 #[test]
1076 fn test_columnar_table() {
1077 let mut table = ColumnarTable::new("users");
1078 table.add_column("id", ColumnType::Int64);
1079 table.add_column("name", ColumnType::Text);
1080 table.add_column("active", ColumnType::Bool);
1081 table.set_primary_key("id");
1082
1083 let mut row1 = HashMap::new();
1084 row1.insert("id".to_string(), ColumnValue::Int64(1));
1085 row1.insert("name".to_string(), ColumnValue::Text("Alice".to_string()));
1086 row1.insert("active".to_string(), ColumnValue::Bool(true));
1087 table.insert_row(&row1);
1088
1089 let mut row2 = HashMap::new();
1090 row2.insert("id".to_string(), ColumnValue::Int64(2));
1091 row2.insert("name".to_string(), ColumnValue::Text("Bob".to_string()));
1092 row2.insert("active".to_string(), ColumnValue::Bool(false));
1093 table.insert_row(&row2);
1094
1095 assert_eq!(table.row_count(), 2);
1096 assert_eq!(table.get_by_pk(1), Some(0));
1097 assert_eq!(table.get_by_pk(2), Some(1));
1098 assert_eq!(table.get_by_pk(3), None);
1099
1100 let id_col = table.get_column("id").unwrap();
1101 assert_eq!(id_col.data.get_i64(0), Some(1));
1102 assert_eq!(id_col.data.get_i64(1), Some(2));
1103 }
1104
1105 #[test]
1106 fn test_memory_savings() {
1107 let mut table = ColumnarTable::new("test");
1108 table.add_column("id", ColumnType::Int64);
1109 table.add_column("value", ColumnType::Float64);
1110 table.add_column("flag", ColumnType::Bool);
1111
1112 for i in 0..1000 {
1114 let mut row = HashMap::new();
1115 row.insert("id".to_string(), ColumnValue::Int64(i));
1116 row.insert("value".to_string(), ColumnValue::Float64(i as f64 * 1.5));
1117 row.insert("flag".to_string(), ColumnValue::Bool(i % 2 == 0));
1118 table.insert_row(&row);
1119 }
1120
1121 let comparison = table.memory_comparison();
1122
1123 assert!(
1127 comparison.savings_ratio > 3.0,
1128 "Expected 3x+ savings, got {:.2}x",
1129 comparison.savings_ratio
1130 );
1131 }
1132
1133 #[test]
1134 fn test_simd_sum() {
1135 let mut col = TypedColumn::new_int64();
1136 for i in 0..10000 {
1137 col.push_i64(Some(i));
1138 }
1139
1140 let sum = col.sum_i64();
1141 let expected: i64 = (0..10000).sum();
1142 assert_eq!(sum, expected);
1143 }
1144
1145 #[test]
1146 fn test_columnar_store() {
1147 let mut store = ColumnarStore::new();
1148
1149 {
1150 let table = store.create_table("users");
1151 table.add_column("id", ColumnType::Int64);
1152 table.add_column("name", ColumnType::Text);
1153 }
1154
1155 assert!(store.get_table("users").is_some());
1156 assert!(store.get_table("orders").is_none());
1157
1158 store.drop_table("users");
1159 assert!(store.get_table("users").is_none());
1160 }
1161
1162 #[test]
1163 fn test_column_stats() {
1164 let mut col = TypedColumn::new_int64();
1165 col.push_i64(Some(10));
1166 col.push_i64(Some(50));
1167 col.push_i64(None);
1168 col.push_i64(Some(30));
1169 col.push_i64(Some(20));
1170
1171 let stats = col.stats();
1172 assert_eq!(stats.min_i64, Some(10));
1173 assert_eq!(stats.max_i64, Some(50));
1174 assert_eq!(stats.null_count, 1);
1175 assert_eq!(stats.row_count, 5);
1176 }
1177}