1use std::collections::HashMap;
41use std::sync::atomic::{AtomicU64, Ordering};
42
43#[derive(Debug, Clone, Default)]
45pub struct ValidityBitmap {
46 bits: Vec<u64>,
48 null_count: usize,
50 len: usize,
52}
53
54impl ValidityBitmap {
55 pub fn new_all_valid(len: usize) -> Self {
57 let num_words = len.div_ceil(64);
58 Self {
59 bits: vec![u64::MAX; num_words],
60 null_count: 0,
61 len,
62 }
63 }
64
65 pub fn new_all_null(len: usize) -> Self {
67 let num_words = len.div_ceil(64);
68 Self {
69 bits: vec![0; num_words],
70 null_count: len,
71 len,
72 }
73 }
74
75 #[inline]
77 pub fn is_valid(&self, idx: usize) -> bool {
78 if idx >= self.len {
79 return false;
80 }
81 let word = idx / 64;
82 let bit = idx % 64;
83 (self.bits[word] >> bit) & 1 == 1
84 }
85
86 #[inline]
88 pub fn set_valid(&mut self, idx: usize) {
89 if idx >= self.len {
90 return;
91 }
92 let word = idx / 64;
93 let bit = idx % 64;
94 if !self.is_valid(idx) {
95 self.bits[word] |= 1 << bit;
96 self.null_count = self.null_count.saturating_sub(1);
97 }
98 }
99
100 #[inline]
102 pub fn set_null(&mut self, idx: usize) {
103 if idx >= self.len {
104 return;
105 }
106 let word = idx / 64;
107 let bit = idx % 64;
108 if self.is_valid(idx) {
109 self.bits[word] &= !(1 << bit);
110 self.null_count = self.null_count.saturating_add(1);
111 }
112 }
113
114 pub fn push(&mut self, valid: bool) {
116 let idx = self.len;
117 self.len += 1;
118 let num_words = self.len.div_ceil(64);
119 while self.bits.len() < num_words {
120 self.bits.push(0);
121 }
122 if valid {
123 self.set_valid(idx);
124 } else {
125 self.null_count += 1;
126 }
127 }
128
129 pub fn null_count(&self) -> usize {
131 self.null_count
132 }
133
134 pub fn len(&self) -> usize {
136 self.len
137 }
138
139 pub fn is_empty(&self) -> bool {
141 self.len == 0
142 }
143}
144
145#[derive(Debug, Clone, Default)]
147pub struct ColumnStats {
148 pub min_i64: Option<i64>,
150 pub max_i64: Option<i64>,
151 pub min_f64: Option<f64>,
152 pub max_f64: Option<f64>,
153 pub distinct_count: u64,
155 pub null_count: u64,
157 pub row_count: u64,
159}
160
161impl ColumnStats {
162 pub fn update_i64(&mut self, value: i64) {
164 self.min_i64 = Some(self.min_i64.map_or(value, |m| m.min(value)));
165 self.max_i64 = Some(self.max_i64.map_or(value, |m| m.max(value)));
166 self.row_count += 1;
167 }
168
169 pub fn update_f64(&mut self, value: f64) {
171 self.min_f64 = Some(self.min_f64.map_or(value, |m| m.min(value)));
172 self.max_f64 = Some(self.max_f64.map_or(value, |m| m.max(value)));
173 self.row_count += 1;
174 }
175
176 pub fn update_null(&mut self) {
178 self.null_count += 1;
179 self.row_count += 1;
180 }
181}
182
183#[derive(Debug, Clone)]
185pub enum TypedColumn {
186 Int64 {
188 values: Vec<i64>,
189 validity: ValidityBitmap,
190 stats: ColumnStats,
191 },
192 UInt64 {
194 values: Vec<u64>,
195 validity: ValidityBitmap,
196 stats: ColumnStats,
197 },
198 Float64 {
200 values: Vec<f64>,
201 validity: ValidityBitmap,
202 stats: ColumnStats,
203 },
204 Text {
206 offsets: Vec<u32>,
208 data: Vec<u8>,
210 validity: ValidityBitmap,
211 stats: ColumnStats,
212 },
213 Binary {
215 offsets: Vec<u32>,
216 data: Vec<u8>,
217 validity: ValidityBitmap,
218 stats: ColumnStats,
219 },
220 Bool {
222 values: Vec<u64>,
224 validity: ValidityBitmap,
225 stats: ColumnStats,
226 len: usize,
227 },
228}
229
230impl TypedColumn {
231 pub fn new_int64() -> Self {
233 TypedColumn::Int64 {
234 values: Vec::new(),
235 validity: ValidityBitmap::default(),
236 stats: ColumnStats::default(),
237 }
238 }
239
240 pub fn new_uint64() -> Self {
242 TypedColumn::UInt64 {
243 values: Vec::new(),
244 validity: ValidityBitmap::default(),
245 stats: ColumnStats::default(),
246 }
247 }
248
249 pub fn new_float64() -> Self {
251 TypedColumn::Float64 {
252 values: Vec::new(),
253 validity: ValidityBitmap::default(),
254 stats: ColumnStats::default(),
255 }
256 }
257
258 pub fn new_text() -> Self {
260 TypedColumn::Text {
261 offsets: vec![0], data: Vec::new(),
263 validity: ValidityBitmap::default(),
264 stats: ColumnStats::default(),
265 }
266 }
267
268 pub fn new_binary() -> Self {
270 TypedColumn::Binary {
271 offsets: vec![0],
272 data: Vec::new(),
273 validity: ValidityBitmap::default(),
274 stats: ColumnStats::default(),
275 }
276 }
277
278 pub fn new_bool() -> Self {
280 TypedColumn::Bool {
281 values: Vec::new(),
282 validity: ValidityBitmap::default(),
283 stats: ColumnStats::default(),
284 len: 0,
285 }
286 }
287
288 pub fn len(&self) -> usize {
290 match self {
291 TypedColumn::Int64 { values, .. } => values.len(),
292 TypedColumn::UInt64 { values, .. } => values.len(),
293 TypedColumn::Float64 { values, .. } => values.len(),
294 TypedColumn::Text { offsets, .. } => offsets.len().saturating_sub(1),
295 TypedColumn::Binary { offsets, .. } => offsets.len().saturating_sub(1),
296 TypedColumn::Bool { len, .. } => *len,
297 }
298 }
299
300 pub fn is_empty(&self) -> bool {
302 self.len() == 0
303 }
304
305 pub fn push_i64(&mut self, value: Option<i64>) {
307 if let TypedColumn::Int64 {
308 values,
309 validity,
310 stats,
311 } = self
312 {
313 match value {
314 Some(v) => {
315 values.push(v);
316 validity.push(true);
317 stats.update_i64(v);
318 }
319 None => {
320 values.push(0); validity.push(false);
322 stats.update_null();
323 }
324 }
325 }
326 }
327
328 pub fn push_u64(&mut self, value: Option<u64>) {
330 if let TypedColumn::UInt64 {
331 values,
332 validity,
333 stats,
334 } = self
335 {
336 match value {
337 Some(v) => {
338 values.push(v);
339 validity.push(true);
340 stats.update_i64(v as i64);
341 }
342 None => {
343 values.push(0);
344 validity.push(false);
345 stats.update_null();
346 }
347 }
348 }
349 }
350
351 pub fn push_f64(&mut self, value: Option<f64>) {
353 if let TypedColumn::Float64 {
354 values,
355 validity,
356 stats,
357 } = self
358 {
359 match value {
360 Some(v) => {
361 values.push(v);
362 validity.push(true);
363 stats.update_f64(v);
364 }
365 None => {
366 values.push(0.0);
367 validity.push(false);
368 stats.update_null();
369 }
370 }
371 }
372 }
373
374 pub fn push_text(&mut self, value: Option<&str>) {
376 if let TypedColumn::Text {
377 offsets,
378 data,
379 validity,
380 stats,
381 } = self
382 {
383 match value {
384 Some(s) => {
385 data.extend_from_slice(s.as_bytes());
386 offsets.push(data.len() as u32);
387 validity.push(true);
388 stats.row_count += 1;
389 }
390 None => {
391 offsets.push(data.len() as u32);
392 validity.push(false);
393 stats.update_null();
394 }
395 }
396 }
397 }
398
399 pub fn push_binary(&mut self, value: Option<&[u8]>) {
401 if let TypedColumn::Binary {
402 offsets,
403 data,
404 validity,
405 stats,
406 } = self
407 {
408 match value {
409 Some(b) => {
410 data.extend_from_slice(b);
411 offsets.push(data.len() as u32);
412 validity.push(true);
413 stats.row_count += 1;
414 }
415 None => {
416 offsets.push(data.len() as u32);
417 validity.push(false);
418 stats.update_null();
419 }
420 }
421 }
422 }
423
424 pub fn push_bool(&mut self, value: Option<bool>) {
426 if let TypedColumn::Bool {
427 values,
428 validity,
429 stats,
430 len,
431 } = self
432 {
433 let idx = *len;
434 *len += 1;
435 let num_words = (*len).div_ceil(64);
436 while values.len() < num_words {
437 values.push(0);
438 }
439 match value {
440 Some(v) => {
441 if v {
442 let word = idx / 64;
443 let bit = idx % 64;
444 values[word] |= 1 << bit;
445 }
446 validity.push(true);
447 stats.row_count += 1;
448 }
449 None => {
450 validity.push(false);
451 stats.update_null();
452 }
453 }
454 }
455 }
456
457 pub fn get_i64(&self, idx: usize) -> Option<i64> {
459 if let TypedColumn::Int64 {
460 values, validity, ..
461 } = self
462 && idx < values.len()
463 && validity.is_valid(idx)
464 {
465 return Some(values[idx]);
466 }
467 None
468 }
469
470 pub fn get_u64(&self, idx: usize) -> Option<u64> {
472 if let TypedColumn::UInt64 {
473 values, validity, ..
474 } = self
475 && idx < values.len()
476 && validity.is_valid(idx)
477 {
478 return Some(values[idx]);
479 }
480 None
481 }
482
483 pub fn get_f64(&self, idx: usize) -> Option<f64> {
485 if let TypedColumn::Float64 {
486 values, validity, ..
487 } = self
488 && idx < values.len()
489 && validity.is_valid(idx)
490 {
491 return Some(values[idx]);
492 }
493 None
494 }
495
496 pub fn get_text(&self, idx: usize) -> Option<&str> {
498 if let TypedColumn::Text {
499 offsets,
500 data,
501 validity,
502 ..
503 } = self
504 && idx + 1 < offsets.len()
505 && validity.is_valid(idx)
506 {
507 let start = offsets[idx] as usize;
508 let end = offsets[idx + 1] as usize;
509 return std::str::from_utf8(&data[start..end]).ok();
510 }
511 None
512 }
513
514 pub fn get_binary(&self, idx: usize) -> Option<&[u8]> {
516 if let TypedColumn::Binary {
517 offsets,
518 data,
519 validity,
520 ..
521 } = self
522 && idx + 1 < offsets.len()
523 && validity.is_valid(idx)
524 {
525 let start = offsets[idx] as usize;
526 let end = offsets[idx + 1] as usize;
527 return Some(&data[start..end]);
528 }
529 None
530 }
531
532 pub fn get_bool(&self, idx: usize) -> Option<bool> {
534 if let TypedColumn::Bool {
535 values,
536 validity,
537 len,
538 ..
539 } = self
540 && idx < *len
541 && validity.is_valid(idx)
542 {
543 let word = idx / 64;
544 let bit = idx % 64;
545 return Some((values[word] >> bit) & 1 == 1);
546 }
547 None
548 }
549
550 pub fn is_null(&self, idx: usize) -> bool {
552 match self {
553 TypedColumn::Int64 { validity, .. } => !validity.is_valid(idx),
554 TypedColumn::UInt64 { validity, .. } => !validity.is_valid(idx),
555 TypedColumn::Float64 { validity, .. } => !validity.is_valid(idx),
556 TypedColumn::Text { validity, .. } => !validity.is_valid(idx),
557 TypedColumn::Binary { validity, .. } => !validity.is_valid(idx),
558 TypedColumn::Bool { validity, .. } => !validity.is_valid(idx),
559 }
560 }
561
562 pub fn stats(&self) -> &ColumnStats {
564 match self {
565 TypedColumn::Int64 { stats, .. } => stats,
566 TypedColumn::UInt64 { stats, .. } => stats,
567 TypedColumn::Float64 { stats, .. } => stats,
568 TypedColumn::Text { stats, .. } => stats,
569 TypedColumn::Binary { stats, .. } => stats,
570 TypedColumn::Bool { stats, .. } => stats,
571 }
572 }
573
574 #[inline]
576 pub fn sum_i64(&self) -> i64 {
577 if let TypedColumn::Int64 {
578 values, validity, ..
579 } = self
580 {
581 if validity.null_count() == 0 {
583 values.iter().sum()
584 } else {
585 values
587 .iter()
588 .enumerate()
589 .filter(|(i, _)| validity.is_valid(*i))
590 .map(|(_, v)| *v)
591 .sum()
592 }
593 } else {
594 0
595 }
596 }
597
598 #[inline]
600 pub fn sum_f64(&self) -> f64 {
601 if let TypedColumn::Float64 {
602 values, validity, ..
603 } = self
604 {
605 if validity.null_count() == 0 {
606 values.iter().sum()
607 } else {
608 values
609 .iter()
610 .enumerate()
611 .filter(|(i, _)| validity.is_valid(*i))
612 .map(|(_, v)| *v)
613 .sum()
614 }
615 } else {
616 0.0
617 }
618 }
619
620 pub fn memory_size(&self) -> usize {
622 match self {
623 TypedColumn::Int64 {
624 values, validity, ..
625 } => values.len() * 8 + validity.bits.len() * 8,
626 TypedColumn::UInt64 {
627 values, validity, ..
628 } => values.len() * 8 + validity.bits.len() * 8,
629 TypedColumn::Float64 {
630 values, validity, ..
631 } => values.len() * 8 + validity.bits.len() * 8,
632 TypedColumn::Text {
633 offsets,
634 data,
635 validity,
636 ..
637 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
638 TypedColumn::Binary {
639 offsets,
640 data,
641 validity,
642 ..
643 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
644 TypedColumn::Bool {
645 values, validity, ..
646 } => values.len() * 8 + validity.bits.len() * 8,
647 }
648 }
649
650 pub fn value_at(&self, idx: usize) -> crate::SochValue {
656 use crate::SochValue;
657 match self {
658 TypedColumn::Int64 { values, validity, .. } => {
659 if idx < values.len() && validity.is_valid(idx) {
660 SochValue::Int(values[idx])
661 } else {
662 SochValue::Null
663 }
664 }
665 TypedColumn::UInt64 { values, validity, .. } => {
666 if idx < values.len() && validity.is_valid(idx) {
667 SochValue::UInt(values[idx])
668 } else {
669 SochValue::Null
670 }
671 }
672 TypedColumn::Float64 { values, validity, .. } => {
673 if idx < values.len() && validity.is_valid(idx) {
674 SochValue::Float(values[idx])
675 } else {
676 SochValue::Null
677 }
678 }
679 TypedColumn::Text { offsets, data, validity, .. } => {
680 if idx + 1 < offsets.len() && validity.is_valid(idx) {
681 let start = offsets[idx] as usize;
682 let end = offsets[idx + 1] as usize;
683 std::str::from_utf8(&data[start..end])
684 .map(|s| SochValue::Text(s.to_owned()))
685 .unwrap_or(SochValue::Null)
686 } else {
687 SochValue::Null
688 }
689 }
690 TypedColumn::Binary { offsets, data, validity, .. } => {
691 if idx + 1 < offsets.len() && validity.is_valid(idx) {
692 let start = offsets[idx] as usize;
693 let end = offsets[idx + 1] as usize;
694 SochValue::Binary(data[start..end].to_vec())
695 } else {
696 SochValue::Null
697 }
698 }
699 TypedColumn::Bool { values, validity, len, .. } => {
700 if idx < *len && validity.is_valid(idx) {
701 let word = idx / 64;
702 let bit = idx % 64;
703 SochValue::Bool((values[word] >> bit) & 1 == 1)
704 } else {
705 SochValue::Null
706 }
707 }
708 }
709 }
710}
711
712#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
714pub enum ColumnType {
715 Int64,
716 UInt64,
717 Float64,
718 Text,
719 Binary,
720 Bool,
721}
722
723impl ColumnType {
724 pub fn create_column(&self) -> TypedColumn {
726 match self {
727 ColumnType::Int64 => TypedColumn::new_int64(),
728 ColumnType::UInt64 => TypedColumn::new_uint64(),
729 ColumnType::Float64 => TypedColumn::new_float64(),
730 ColumnType::Text => TypedColumn::new_text(),
731 ColumnType::Binary => TypedColumn::new_binary(),
732 ColumnType::Bool => TypedColumn::new_bool(),
733 }
734 }
735}
736
737#[derive(Debug, Clone)]
739pub struct ColumnChunk {
740 pub name: String,
742 pub column_type: ColumnType,
744 pub data: TypedColumn,
746}
747
748impl ColumnChunk {
749 pub fn new(name: impl Into<String>, column_type: ColumnType) -> Self {
751 Self {
752 name: name.into(),
753 column_type,
754 data: column_type.create_column(),
755 }
756 }
757
758 pub fn stats(&self) -> &ColumnStats {
760 self.data.stats()
761 }
762}
763
764#[derive(Debug)]
766pub struct ColumnarTable {
767 pub name: String,
769 columns: HashMap<String, ColumnChunk>,
771 column_order: Vec<String>,
773 primary_key: Option<String>,
775 pk_index: std::collections::BTreeMap<i64, u32>,
777 row_count: AtomicU64,
779}
780
781impl Clone for ColumnarTable {
782 fn clone(&self) -> Self {
783 Self {
784 name: self.name.clone(),
785 columns: self.columns.clone(),
786 column_order: self.column_order.clone(),
787 primary_key: self.primary_key.clone(),
788 pk_index: self.pk_index.clone(),
789 row_count: AtomicU64::new(self.row_count.load(std::sync::atomic::Ordering::Relaxed)),
790 }
791 }
792}
793
794impl ColumnarTable {
795 pub fn new(name: impl Into<String>) -> Self {
797 Self {
798 name: name.into(),
799 columns: HashMap::new(),
800 column_order: Vec::new(),
801 primary_key: None,
802 pk_index: std::collections::BTreeMap::new(),
803 row_count: AtomicU64::new(0),
804 }
805 }
806
807 pub fn add_column(&mut self, name: impl Into<String>, column_type: ColumnType) -> &mut Self {
809 let name = name.into();
810 self.column_order.push(name.clone());
811 self.columns
812 .insert(name.clone(), ColumnChunk::new(name, column_type));
813 self
814 }
815
816 pub fn set_primary_key(&mut self, column: impl Into<String>) -> &mut Self {
818 self.primary_key = Some(column.into());
819 self
820 }
821
822 pub fn row_count(&self) -> u64 {
824 self.row_count.load(Ordering::Relaxed)
825 }
826
827 pub fn get_column(&self, name: &str) -> Option<&ColumnChunk> {
829 self.columns.get(name)
830 }
831
832 pub fn get_column_mut(&mut self, name: &str) -> Option<&mut ColumnChunk> {
834 self.columns.get_mut(name)
835 }
836
837 pub fn get_by_pk(&self, pk: i64) -> Option<u32> {
839 self.pk_index.get(&pk).copied()
840 }
841
842 pub fn insert_row(&mut self, values: &HashMap<String, ColumnValue>) -> u32 {
844 let row_idx = self.row_count.fetch_add(1, Ordering::Relaxed) as u32;
845
846 for col_name in &self.column_order {
847 let chunk = self.columns.get_mut(col_name).unwrap();
848 let value = values.get(col_name);
849
850 match &mut chunk.data {
851 TypedColumn::Int64 {
852 values,
853 validity,
854 stats,
855 } => {
856 match value {
857 Some(ColumnValue::Int64(v)) => {
858 values.push(*v);
859 validity.push(true);
860 stats.update_i64(*v);
861
862 if self.primary_key.as_ref() == Some(col_name) {
864 self.pk_index.insert(*v, row_idx);
865 }
866 }
867 _ => {
868 values.push(0);
869 validity.push(false);
870 stats.update_null();
871 }
872 }
873 }
874 TypedColumn::UInt64 {
875 values,
876 validity,
877 stats,
878 } => match value {
879 Some(ColumnValue::UInt64(v)) => {
880 values.push(*v);
881 validity.push(true);
882 stats.update_i64(*v as i64);
883 }
884 _ => {
885 values.push(0);
886 validity.push(false);
887 stats.update_null();
888 }
889 },
890 TypedColumn::Float64 {
891 values,
892 validity,
893 stats,
894 } => match value {
895 Some(ColumnValue::Float64(v)) => {
896 values.push(*v);
897 validity.push(true);
898 stats.update_f64(*v);
899 }
900 _ => {
901 values.push(0.0);
902 validity.push(false);
903 stats.update_null();
904 }
905 },
906 TypedColumn::Text {
907 offsets,
908 data,
909 validity,
910 stats,
911 } => match value {
912 Some(ColumnValue::Text(s)) => {
913 data.extend_from_slice(s.as_bytes());
914 offsets.push(data.len() as u32);
915 validity.push(true);
916 stats.row_count += 1;
917 }
918 _ => {
919 offsets.push(data.len() as u32);
920 validity.push(false);
921 stats.update_null();
922 }
923 },
924 TypedColumn::Binary {
925 offsets,
926 data,
927 validity,
928 stats,
929 } => match value {
930 Some(ColumnValue::Binary(b)) => {
931 data.extend_from_slice(b);
932 offsets.push(data.len() as u32);
933 validity.push(true);
934 stats.row_count += 1;
935 }
936 _ => {
937 offsets.push(data.len() as u32);
938 validity.push(false);
939 stats.update_null();
940 }
941 },
942 TypedColumn::Bool {
943 values,
944 validity,
945 stats,
946 len,
947 } => {
948 let idx = *len;
949 *len += 1;
950 let num_words = (*len).div_ceil(64);
951 while values.len() < num_words {
952 values.push(0);
953 }
954 match value {
955 Some(ColumnValue::Bool(v)) => {
956 if *v {
957 let word = idx / 64;
958 let bit = idx % 64;
959 values[word] |= 1 << bit;
960 }
961 validity.push(true);
962 stats.row_count += 1;
963 }
964 _ => {
965 validity.push(false);
966 stats.update_null();
967 }
968 }
969 }
970 }
971 }
972
973 row_idx
974 }
975
976 pub fn memory_size(&self) -> usize {
978 self.columns.values().map(|c| c.data.memory_size()).sum()
979 }
980
981 pub fn memory_comparison(&self) -> MemoryComparison {
983 let typed_size = self.memory_size();
984 let row_count = self.row_count() as usize;
985 let column_count = self.columns.len();
986
987 let enum_size = row_count * column_count * 32;
989
990 MemoryComparison {
991 typed_bytes: typed_size,
992 enum_bytes: enum_size,
993 savings_ratio: if typed_size > 0 {
994 enum_size as f64 / typed_size as f64
995 } else {
996 1.0
997 },
998 }
999 }
1000}
1001
1002#[derive(Debug, Clone)]
1004pub struct MemoryComparison {
1005 pub typed_bytes: usize,
1006 pub enum_bytes: usize,
1007 pub savings_ratio: f64,
1008}
1009
1010#[derive(Debug, Clone)]
1012pub enum ColumnValue {
1013 Null,
1014 Int64(i64),
1015 UInt64(u64),
1016 Float64(f64),
1017 Text(String),
1018 Binary(Vec<u8>),
1019 Bool(bool),
1020}
1021
1022#[derive(Debug, Default)]
1024pub struct ColumnarStore {
1025 tables: HashMap<String, ColumnarTable>,
1027}
1028
1029impl ColumnarStore {
1030 pub fn new() -> Self {
1032 Self {
1033 tables: HashMap::new(),
1034 }
1035 }
1036
1037 pub fn create_table(&mut self, name: impl Into<String>) -> &mut ColumnarTable {
1039 let name = name.into();
1040 self.tables
1041 .entry(name.clone())
1042 .or_insert_with(|| ColumnarTable::new(name))
1043 }
1044
1045 pub fn get_table(&self, name: &str) -> Option<&ColumnarTable> {
1047 self.tables.get(name)
1048 }
1049
1050 pub fn get_table_mut(&mut self, name: &str) -> Option<&mut ColumnarTable> {
1052 self.tables.get_mut(name)
1053 }
1054
1055 pub fn drop_table(&mut self, name: &str) -> bool {
1057 self.tables.remove(name).is_some()
1058 }
1059
1060 pub fn memory_size(&self) -> usize {
1062 self.tables.values().map(|t| t.memory_size()).sum()
1063 }
1064}
1065
1066#[cfg(test)]
1067mod tests {
1068 use super::*;
1069
1070 #[test]
1071 fn test_validity_bitmap() {
1072 let mut bitmap = ValidityBitmap::new_all_valid(10);
1073 assert_eq!(bitmap.len(), 10);
1074 assert_eq!(bitmap.null_count(), 0);
1075 assert!(bitmap.is_valid(0));
1076 assert!(bitmap.is_valid(9));
1077
1078 bitmap.set_null(5);
1079 assert_eq!(bitmap.null_count(), 1);
1080 assert!(!bitmap.is_valid(5));
1081
1082 bitmap.set_valid(5);
1083 assert_eq!(bitmap.null_count(), 0);
1084 assert!(bitmap.is_valid(5));
1085 }
1086
1087 #[test]
1088 fn test_int64_column() {
1089 let mut col = TypedColumn::new_int64();
1090 col.push_i64(Some(100));
1091 col.push_i64(Some(200));
1092 col.push_i64(None);
1093 col.push_i64(Some(300));
1094
1095 assert_eq!(col.len(), 4);
1096 assert_eq!(col.get_i64(0), Some(100));
1097 assert_eq!(col.get_i64(1), Some(200));
1098 assert_eq!(col.get_i64(2), None);
1099 assert_eq!(col.get_i64(3), Some(300));
1100 assert!(col.is_null(2));
1101
1102 assert_eq!(col.sum_i64(), 600);
1103 }
1104
1105 #[test]
1106 fn test_text_column() {
1107 let mut col = TypedColumn::new_text();
1108 col.push_text(Some("hello"));
1109 col.push_text(Some("world"));
1110 col.push_text(None);
1111 col.push_text(Some("test"));
1112
1113 assert_eq!(col.len(), 4);
1114 assert_eq!(col.get_text(0), Some("hello"));
1115 assert_eq!(col.get_text(1), Some("world"));
1116 assert_eq!(col.get_text(2), None);
1117 assert_eq!(col.get_text(3), Some("test"));
1118 }
1119
1120 #[test]
1121 fn test_bool_column() {
1122 let mut col = TypedColumn::new_bool();
1123 col.push_bool(Some(true));
1124 col.push_bool(Some(false));
1125 col.push_bool(None);
1126 col.push_bool(Some(true));
1127
1128 assert_eq!(col.len(), 4);
1129 assert_eq!(col.get_bool(0), Some(true));
1130 assert_eq!(col.get_bool(1), Some(false));
1131 assert_eq!(col.get_bool(2), None);
1132 assert_eq!(col.get_bool(3), Some(true));
1133
1134 assert!(col.memory_size() < 32);
1137 }
1138
1139 #[test]
1140 fn test_columnar_table() {
1141 let mut table = ColumnarTable::new("users");
1142 table.add_column("id", ColumnType::Int64);
1143 table.add_column("name", ColumnType::Text);
1144 table.add_column("active", ColumnType::Bool);
1145 table.set_primary_key("id");
1146
1147 let mut row1 = HashMap::new();
1148 row1.insert("id".to_string(), ColumnValue::Int64(1));
1149 row1.insert("name".to_string(), ColumnValue::Text("Alice".to_string()));
1150 row1.insert("active".to_string(), ColumnValue::Bool(true));
1151 table.insert_row(&row1);
1152
1153 let mut row2 = HashMap::new();
1154 row2.insert("id".to_string(), ColumnValue::Int64(2));
1155 row2.insert("name".to_string(), ColumnValue::Text("Bob".to_string()));
1156 row2.insert("active".to_string(), ColumnValue::Bool(false));
1157 table.insert_row(&row2);
1158
1159 assert_eq!(table.row_count(), 2);
1160 assert_eq!(table.get_by_pk(1), Some(0));
1161 assert_eq!(table.get_by_pk(2), Some(1));
1162 assert_eq!(table.get_by_pk(3), None);
1163
1164 let id_col = table.get_column("id").unwrap();
1165 assert_eq!(id_col.data.get_i64(0), Some(1));
1166 assert_eq!(id_col.data.get_i64(1), Some(2));
1167 }
1168
1169 #[test]
1170 fn test_memory_savings() {
1171 let mut table = ColumnarTable::new("test");
1172 table.add_column("id", ColumnType::Int64);
1173 table.add_column("value", ColumnType::Float64);
1174 table.add_column("flag", ColumnType::Bool);
1175
1176 for i in 0..1000 {
1178 let mut row = HashMap::new();
1179 row.insert("id".to_string(), ColumnValue::Int64(i));
1180 row.insert("value".to_string(), ColumnValue::Float64(i as f64 * 1.5));
1181 row.insert("flag".to_string(), ColumnValue::Bool(i % 2 == 0));
1182 table.insert_row(&row);
1183 }
1184
1185 let comparison = table.memory_comparison();
1186
1187 assert!(
1191 comparison.savings_ratio > 3.0,
1192 "Expected 3x+ savings, got {:.2}x",
1193 comparison.savings_ratio
1194 );
1195 }
1196
1197 #[test]
1198 fn test_simd_sum() {
1199 let mut col = TypedColumn::new_int64();
1200 for i in 0..10000 {
1201 col.push_i64(Some(i));
1202 }
1203
1204 let sum = col.sum_i64();
1205 let expected: i64 = (0..10000).sum();
1206 assert_eq!(sum, expected);
1207 }
1208
1209 #[test]
1210 fn test_columnar_store() {
1211 let mut store = ColumnarStore::new();
1212
1213 {
1214 let table = store.create_table("users");
1215 table.add_column("id", ColumnType::Int64);
1216 table.add_column("name", ColumnType::Text);
1217 }
1218
1219 assert!(store.get_table("users").is_some());
1220 assert!(store.get_table("orders").is_none());
1221
1222 store.drop_table("users");
1223 assert!(store.get_table("users").is_none());
1224 }
1225
1226 #[test]
1227 fn test_column_stats() {
1228 let mut col = TypedColumn::new_int64();
1229 col.push_i64(Some(10));
1230 col.push_i64(Some(50));
1231 col.push_i64(None);
1232 col.push_i64(Some(30));
1233 col.push_i64(Some(20));
1234
1235 let stats = col.stats();
1236 assert_eq!(stats.min_i64, Some(10));
1237 assert_eq!(stats.max_i64, Some(50));
1238 assert_eq!(stats.null_count, 1);
1239 assert_eq!(stats.row_count, 5);
1240 }
1241
1242 #[test]
1243 fn test_typed_column_value_at() {
1244 use crate::SochValue;
1245
1246 let mut col = TypedColumn::new_int64();
1248 col.push_i64(Some(42));
1249 col.push_i64(None);
1250 col.push_i64(Some(-7));
1251 assert_eq!(col.value_at(0), SochValue::Int(42));
1252 assert_eq!(col.value_at(1), SochValue::Null);
1253 assert_eq!(col.value_at(2), SochValue::Int(-7));
1254 assert_eq!(col.value_at(99), SochValue::Null); let mut fcol = TypedColumn::new_float64();
1258 fcol.push_f64(Some(3.14));
1259 fcol.push_f64(None);
1260 assert_eq!(fcol.value_at(0), SochValue::Float(3.14));
1261 assert_eq!(fcol.value_at(1), SochValue::Null);
1262
1263 let mut tcol = TypedColumn::new_text();
1265 tcol.push_text(Some("hello"));
1266 tcol.push_text(None);
1267 tcol.push_text(Some("world"));
1268 assert_eq!(tcol.value_at(0), SochValue::Text("hello".to_string()));
1269 assert_eq!(tcol.value_at(1), SochValue::Null);
1270 assert_eq!(tcol.value_at(2), SochValue::Text("world".to_string()));
1271
1272 let mut bcol = TypedColumn::new_bool();
1274 bcol.push_bool(Some(true));
1275 bcol.push_bool(Some(false));
1276 bcol.push_bool(None);
1277 assert_eq!(bcol.value_at(0), SochValue::Bool(true));
1278 assert_eq!(bcol.value_at(1), SochValue::Bool(false));
1279 assert_eq!(bcol.value_at(2), SochValue::Null);
1280 }
1281}