1use std::collections::HashMap;
41use std::sync::atomic::{AtomicU64, Ordering};
42
43#[derive(Debug, Clone, Default)]
45pub struct ValidityBitmap {
46 bits: Vec<u64>,
48 null_count: usize,
50 len: usize,
52}
53
54impl ValidityBitmap {
55 pub fn new_all_valid(len: usize) -> Self {
57 let num_words = len.div_ceil(64);
58 Self {
59 bits: vec![u64::MAX; num_words],
60 null_count: 0,
61 len,
62 }
63 }
64
65 pub fn new_all_null(len: usize) -> Self {
67 let num_words = len.div_ceil(64);
68 Self {
69 bits: vec![0; num_words],
70 null_count: len,
71 len,
72 }
73 }
74
75 #[inline]
77 pub fn is_valid(&self, idx: usize) -> bool {
78 if idx >= self.len {
79 return false;
80 }
81 let word = idx / 64;
82 let bit = idx % 64;
83 (self.bits[word] >> bit) & 1 == 1
84 }
85
86 #[inline]
88 pub fn set_valid(&mut self, idx: usize) {
89 if idx >= self.len {
90 return;
91 }
92 let word = idx / 64;
93 let bit = idx % 64;
94 if !self.is_valid(idx) {
95 self.bits[word] |= 1 << bit;
96 self.null_count = self.null_count.saturating_sub(1);
97 }
98 }
99
100 #[inline]
102 pub fn set_null(&mut self, idx: usize) {
103 if idx >= self.len {
104 return;
105 }
106 let word = idx / 64;
107 let bit = idx % 64;
108 if self.is_valid(idx) {
109 self.bits[word] &= !(1 << bit);
110 self.null_count = self.null_count.saturating_add(1);
111 }
112 }
113
114 pub fn push(&mut self, valid: bool) {
116 let idx = self.len;
117 self.len += 1;
118 let num_words = self.len.div_ceil(64);
119 while self.bits.len() < num_words {
120 self.bits.push(0);
121 }
122 if valid {
123 self.set_valid(idx);
124 } else {
125 self.null_count += 1;
126 }
127 }
128
129 pub fn null_count(&self) -> usize {
131 self.null_count
132 }
133
134 pub fn len(&self) -> usize {
136 self.len
137 }
138
139 pub fn is_empty(&self) -> bool {
141 self.len == 0
142 }
143}
144
145#[derive(Debug, Clone, Default)]
147pub struct ColumnStats {
148 pub min_i64: Option<i64>,
150 pub max_i64: Option<i64>,
151 pub min_f64: Option<f64>,
152 pub max_f64: Option<f64>,
153 pub distinct_count: u64,
155 pub null_count: u64,
157 pub row_count: u64,
159}
160
161impl ColumnStats {
162 pub fn update_i64(&mut self, value: i64) {
164 self.min_i64 = Some(self.min_i64.map_or(value, |m| m.min(value)));
165 self.max_i64 = Some(self.max_i64.map_or(value, |m| m.max(value)));
166 self.row_count += 1;
167 }
168
169 pub fn update_f64(&mut self, value: f64) {
171 self.min_f64 = Some(self.min_f64.map_or(value, |m| m.min(value)));
172 self.max_f64 = Some(self.max_f64.map_or(value, |m| m.max(value)));
173 self.row_count += 1;
174 }
175
176 pub fn update_null(&mut self) {
178 self.null_count += 1;
179 self.row_count += 1;
180 }
181}
182
183#[derive(Debug, Clone)]
185pub enum TypedColumn {
186 Int64 {
188 values: Vec<i64>,
189 validity: ValidityBitmap,
190 stats: ColumnStats,
191 },
192 UInt64 {
194 values: Vec<u64>,
195 validity: ValidityBitmap,
196 stats: ColumnStats,
197 },
198 Float64 {
200 values: Vec<f64>,
201 validity: ValidityBitmap,
202 stats: ColumnStats,
203 },
204 Text {
206 offsets: Vec<u32>,
208 data: Vec<u8>,
210 validity: ValidityBitmap,
211 stats: ColumnStats,
212 },
213 Binary {
215 offsets: Vec<u32>,
216 data: Vec<u8>,
217 validity: ValidityBitmap,
218 stats: ColumnStats,
219 },
220 Bool {
222 values: Vec<u64>,
224 validity: ValidityBitmap,
225 stats: ColumnStats,
226 len: usize,
227 },
228}
229
230impl TypedColumn {
231 pub fn new_int64() -> Self {
233 TypedColumn::Int64 {
234 values: Vec::new(),
235 validity: ValidityBitmap::default(),
236 stats: ColumnStats::default(),
237 }
238 }
239
240 pub fn new_uint64() -> Self {
242 TypedColumn::UInt64 {
243 values: Vec::new(),
244 validity: ValidityBitmap::default(),
245 stats: ColumnStats::default(),
246 }
247 }
248
249 pub fn new_float64() -> Self {
251 TypedColumn::Float64 {
252 values: Vec::new(),
253 validity: ValidityBitmap::default(),
254 stats: ColumnStats::default(),
255 }
256 }
257
258 pub fn new_text() -> Self {
260 TypedColumn::Text {
261 offsets: vec![0], data: Vec::new(),
263 validity: ValidityBitmap::default(),
264 stats: ColumnStats::default(),
265 }
266 }
267
268 pub fn new_binary() -> Self {
270 TypedColumn::Binary {
271 offsets: vec![0],
272 data: Vec::new(),
273 validity: ValidityBitmap::default(),
274 stats: ColumnStats::default(),
275 }
276 }
277
278 pub fn new_bool() -> Self {
280 TypedColumn::Bool {
281 values: Vec::new(),
282 validity: ValidityBitmap::default(),
283 stats: ColumnStats::default(),
284 len: 0,
285 }
286 }
287
288 pub fn len(&self) -> usize {
290 match self {
291 TypedColumn::Int64 { values, .. } => values.len(),
292 TypedColumn::UInt64 { values, .. } => values.len(),
293 TypedColumn::Float64 { values, .. } => values.len(),
294 TypedColumn::Text { offsets, .. } => offsets.len().saturating_sub(1),
295 TypedColumn::Binary { offsets, .. } => offsets.len().saturating_sub(1),
296 TypedColumn::Bool { len, .. } => *len,
297 }
298 }
299
300 pub fn is_empty(&self) -> bool {
302 self.len() == 0
303 }
304
305 pub fn push_i64(&mut self, value: Option<i64>) {
307 if let TypedColumn::Int64 {
308 values,
309 validity,
310 stats,
311 } = self
312 {
313 match value {
314 Some(v) => {
315 values.push(v);
316 validity.push(true);
317 stats.update_i64(v);
318 }
319 None => {
320 values.push(0); validity.push(false);
322 stats.update_null();
323 }
324 }
325 }
326 }
327
328 pub fn push_u64(&mut self, value: Option<u64>) {
330 if let TypedColumn::UInt64 {
331 values,
332 validity,
333 stats,
334 } = self
335 {
336 match value {
337 Some(v) => {
338 values.push(v);
339 validity.push(true);
340 stats.update_i64(v as i64);
341 }
342 None => {
343 values.push(0);
344 validity.push(false);
345 stats.update_null();
346 }
347 }
348 }
349 }
350
351 pub fn push_f64(&mut self, value: Option<f64>) {
353 if let TypedColumn::Float64 {
354 values,
355 validity,
356 stats,
357 } = self
358 {
359 match value {
360 Some(v) => {
361 values.push(v);
362 validity.push(true);
363 stats.update_f64(v);
364 }
365 None => {
366 values.push(0.0);
367 validity.push(false);
368 stats.update_null();
369 }
370 }
371 }
372 }
373
374 pub fn push_text(&mut self, value: Option<&str>) {
376 if let TypedColumn::Text {
377 offsets,
378 data,
379 validity,
380 stats,
381 } = self
382 {
383 match value {
384 Some(s) => {
385 data.extend_from_slice(s.as_bytes());
386 offsets.push(data.len() as u32);
387 validity.push(true);
388 stats.row_count += 1;
389 }
390 None => {
391 offsets.push(data.len() as u32);
392 validity.push(false);
393 stats.update_null();
394 }
395 }
396 }
397 }
398
399 pub fn push_binary(&mut self, value: Option<&[u8]>) {
401 if let TypedColumn::Binary {
402 offsets,
403 data,
404 validity,
405 stats,
406 } = self
407 {
408 match value {
409 Some(b) => {
410 data.extend_from_slice(b);
411 offsets.push(data.len() as u32);
412 validity.push(true);
413 stats.row_count += 1;
414 }
415 None => {
416 offsets.push(data.len() as u32);
417 validity.push(false);
418 stats.update_null();
419 }
420 }
421 }
422 }
423
424 pub fn push_bool(&mut self, value: Option<bool>) {
426 if let TypedColumn::Bool {
427 values,
428 validity,
429 stats,
430 len,
431 } = self
432 {
433 let idx = *len;
434 *len += 1;
435 let num_words = (*len).div_ceil(64);
436 while values.len() < num_words {
437 values.push(0);
438 }
439 match value {
440 Some(v) => {
441 if v {
442 let word = idx / 64;
443 let bit = idx % 64;
444 values[word] |= 1 << bit;
445 }
446 validity.push(true);
447 stats.row_count += 1;
448 }
449 None => {
450 validity.push(false);
451 stats.update_null();
452 }
453 }
454 }
455 }
456
457 pub fn get_i64(&self, idx: usize) -> Option<i64> {
459 if let TypedColumn::Int64 {
460 values, validity, ..
461 } = self
462 && idx < values.len()
463 && validity.is_valid(idx)
464 {
465 return Some(values[idx]);
466 }
467 None
468 }
469
470 pub fn get_u64(&self, idx: usize) -> Option<u64> {
472 if let TypedColumn::UInt64 {
473 values, validity, ..
474 } = self
475 && idx < values.len()
476 && validity.is_valid(idx)
477 {
478 return Some(values[idx]);
479 }
480 None
481 }
482
483 pub fn get_f64(&self, idx: usize) -> Option<f64> {
485 if let TypedColumn::Float64 {
486 values, validity, ..
487 } = self
488 && idx < values.len()
489 && validity.is_valid(idx)
490 {
491 return Some(values[idx]);
492 }
493 None
494 }
495
496 pub fn get_text(&self, idx: usize) -> Option<&str> {
498 if let TypedColumn::Text {
499 offsets,
500 data,
501 validity,
502 ..
503 } = self
504 && idx + 1 < offsets.len()
505 && validity.is_valid(idx)
506 {
507 let start = offsets[idx] as usize;
508 let end = offsets[idx + 1] as usize;
509 return std::str::from_utf8(&data[start..end]).ok();
510 }
511 None
512 }
513
514 pub fn get_binary(&self, idx: usize) -> Option<&[u8]> {
516 if let TypedColumn::Binary {
517 offsets,
518 data,
519 validity,
520 ..
521 } = self
522 && idx + 1 < offsets.len()
523 && validity.is_valid(idx)
524 {
525 let start = offsets[idx] as usize;
526 let end = offsets[idx + 1] as usize;
527 return Some(&data[start..end]);
528 }
529 None
530 }
531
532 pub fn get_bool(&self, idx: usize) -> Option<bool> {
534 if let TypedColumn::Bool {
535 values,
536 validity,
537 len,
538 ..
539 } = self
540 && idx < *len
541 && validity.is_valid(idx)
542 {
543 let word = idx / 64;
544 let bit = idx % 64;
545 return Some((values[word] >> bit) & 1 == 1);
546 }
547 None
548 }
549
550 pub fn is_null(&self, idx: usize) -> bool {
552 match self {
553 TypedColumn::Int64 { validity, .. } => !validity.is_valid(idx),
554 TypedColumn::UInt64 { validity, .. } => !validity.is_valid(idx),
555 TypedColumn::Float64 { validity, .. } => !validity.is_valid(idx),
556 TypedColumn::Text { validity, .. } => !validity.is_valid(idx),
557 TypedColumn::Binary { validity, .. } => !validity.is_valid(idx),
558 TypedColumn::Bool { validity, .. } => !validity.is_valid(idx),
559 }
560 }
561
562 pub fn stats(&self) -> &ColumnStats {
564 match self {
565 TypedColumn::Int64 { stats, .. } => stats,
566 TypedColumn::UInt64 { stats, .. } => stats,
567 TypedColumn::Float64 { stats, .. } => stats,
568 TypedColumn::Text { stats, .. } => stats,
569 TypedColumn::Binary { stats, .. } => stats,
570 TypedColumn::Bool { stats, .. } => stats,
571 }
572 }
573
574 #[inline]
576 pub fn sum_i64(&self) -> i64 {
577 if let TypedColumn::Int64 {
578 values, validity, ..
579 } = self
580 {
581 if validity.null_count() == 0 {
583 values.iter().sum()
584 } else {
585 values
587 .iter()
588 .enumerate()
589 .filter(|(i, _)| validity.is_valid(*i))
590 .map(|(_, v)| *v)
591 .sum()
592 }
593 } else {
594 0
595 }
596 }
597
598 #[inline]
600 pub fn sum_f64(&self) -> f64 {
601 if let TypedColumn::Float64 {
602 values, validity, ..
603 } = self
604 {
605 if validity.null_count() == 0 {
606 values.iter().sum()
607 } else {
608 values
609 .iter()
610 .enumerate()
611 .filter(|(i, _)| validity.is_valid(*i))
612 .map(|(_, v)| *v)
613 .sum()
614 }
615 } else {
616 0.0
617 }
618 }
619
620 pub fn memory_size(&self) -> usize {
622 match self {
623 TypedColumn::Int64 {
624 values, validity, ..
625 } => values.len() * 8 + validity.bits.len() * 8,
626 TypedColumn::UInt64 {
627 values, validity, ..
628 } => values.len() * 8 + validity.bits.len() * 8,
629 TypedColumn::Float64 {
630 values, validity, ..
631 } => values.len() * 8 + validity.bits.len() * 8,
632 TypedColumn::Text {
633 offsets,
634 data,
635 validity,
636 ..
637 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
638 TypedColumn::Binary {
639 offsets,
640 data,
641 validity,
642 ..
643 } => offsets.len() * 4 + data.len() + validity.bits.len() * 8,
644 TypedColumn::Bool {
645 values, validity, ..
646 } => values.len() * 8 + validity.bits.len() * 8,
647 }
648 }
649
650 pub fn value_at(&self, idx: usize) -> crate::SochValue {
656 use crate::SochValue;
657 match self {
658 TypedColumn::Int64 {
659 values, validity, ..
660 } => {
661 if idx < values.len() && validity.is_valid(idx) {
662 SochValue::Int(values[idx])
663 } else {
664 SochValue::Null
665 }
666 }
667 TypedColumn::UInt64 {
668 values, validity, ..
669 } => {
670 if idx < values.len() && validity.is_valid(idx) {
671 SochValue::UInt(values[idx])
672 } else {
673 SochValue::Null
674 }
675 }
676 TypedColumn::Float64 {
677 values, validity, ..
678 } => {
679 if idx < values.len() && validity.is_valid(idx) {
680 SochValue::Float(values[idx])
681 } else {
682 SochValue::Null
683 }
684 }
685 TypedColumn::Text {
686 offsets,
687 data,
688 validity,
689 ..
690 } => {
691 if idx + 1 < offsets.len() && validity.is_valid(idx) {
692 let start = offsets[idx] as usize;
693 let end = offsets[idx + 1] as usize;
694 std::str::from_utf8(&data[start..end])
695 .map(|s| SochValue::Text(s.to_owned()))
696 .unwrap_or(SochValue::Null)
697 } else {
698 SochValue::Null
699 }
700 }
701 TypedColumn::Binary {
702 offsets,
703 data,
704 validity,
705 ..
706 } => {
707 if idx + 1 < offsets.len() && validity.is_valid(idx) {
708 let start = offsets[idx] as usize;
709 let end = offsets[idx + 1] as usize;
710 SochValue::Binary(data[start..end].to_vec())
711 } else {
712 SochValue::Null
713 }
714 }
715 TypedColumn::Bool {
716 values,
717 validity,
718 len,
719 ..
720 } => {
721 if idx < *len && validity.is_valid(idx) {
722 let word = idx / 64;
723 let bit = idx % 64;
724 SochValue::Bool((values[word] >> bit) & 1 == 1)
725 } else {
726 SochValue::Null
727 }
728 }
729 }
730 }
731}
732
733#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
735pub enum ColumnType {
736 Int64,
737 UInt64,
738 Float64,
739 Text,
740 Binary,
741 Bool,
742}
743
744impl ColumnType {
745 pub fn create_column(&self) -> TypedColumn {
747 match self {
748 ColumnType::Int64 => TypedColumn::new_int64(),
749 ColumnType::UInt64 => TypedColumn::new_uint64(),
750 ColumnType::Float64 => TypedColumn::new_float64(),
751 ColumnType::Text => TypedColumn::new_text(),
752 ColumnType::Binary => TypedColumn::new_binary(),
753 ColumnType::Bool => TypedColumn::new_bool(),
754 }
755 }
756}
757
758#[derive(Debug, Clone)]
760pub struct ColumnChunk {
761 pub name: String,
763 pub column_type: ColumnType,
765 pub data: TypedColumn,
767}
768
769impl ColumnChunk {
770 pub fn new(name: impl Into<String>, column_type: ColumnType) -> Self {
772 Self {
773 name: name.into(),
774 column_type,
775 data: column_type.create_column(),
776 }
777 }
778
779 pub fn stats(&self) -> &ColumnStats {
781 self.data.stats()
782 }
783}
784
785#[derive(Debug)]
787pub struct ColumnarTable {
788 pub name: String,
790 columns: HashMap<String, ColumnChunk>,
792 column_order: Vec<String>,
794 primary_key: Option<String>,
796 pk_index: std::collections::BTreeMap<i64, u32>,
798 row_count: AtomicU64,
800}
801
802impl Clone for ColumnarTable {
803 fn clone(&self) -> Self {
804 Self {
805 name: self.name.clone(),
806 columns: self.columns.clone(),
807 column_order: self.column_order.clone(),
808 primary_key: self.primary_key.clone(),
809 pk_index: self.pk_index.clone(),
810 row_count: AtomicU64::new(self.row_count.load(std::sync::atomic::Ordering::Relaxed)),
811 }
812 }
813}
814
815impl ColumnarTable {
816 pub fn new(name: impl Into<String>) -> Self {
818 Self {
819 name: name.into(),
820 columns: HashMap::new(),
821 column_order: Vec::new(),
822 primary_key: None,
823 pk_index: std::collections::BTreeMap::new(),
824 row_count: AtomicU64::new(0),
825 }
826 }
827
828 pub fn add_column(&mut self, name: impl Into<String>, column_type: ColumnType) -> &mut Self {
830 let name = name.into();
831 self.column_order.push(name.clone());
832 self.columns
833 .insert(name.clone(), ColumnChunk::new(name, column_type));
834 self
835 }
836
837 pub fn set_primary_key(&mut self, column: impl Into<String>) -> &mut Self {
839 self.primary_key = Some(column.into());
840 self
841 }
842
843 pub fn row_count(&self) -> u64 {
845 self.row_count.load(Ordering::Relaxed)
846 }
847
848 pub fn get_column(&self, name: &str) -> Option<&ColumnChunk> {
850 self.columns.get(name)
851 }
852
853 pub fn get_column_mut(&mut self, name: &str) -> Option<&mut ColumnChunk> {
855 self.columns.get_mut(name)
856 }
857
858 pub fn get_by_pk(&self, pk: i64) -> Option<u32> {
860 self.pk_index.get(&pk).copied()
861 }
862
863 pub fn insert_row(&mut self, values: &HashMap<String, ColumnValue>) -> u32 {
865 let row_idx = self.row_count.fetch_add(1, Ordering::Relaxed) as u32;
866
867 for col_name in &self.column_order {
868 let chunk = self.columns.get_mut(col_name).unwrap();
869 let value = values.get(col_name);
870
871 match &mut chunk.data {
872 TypedColumn::Int64 {
873 values,
874 validity,
875 stats,
876 } => {
877 match value {
878 Some(ColumnValue::Int64(v)) => {
879 values.push(*v);
880 validity.push(true);
881 stats.update_i64(*v);
882
883 if self.primary_key.as_ref() == Some(col_name) {
885 self.pk_index.insert(*v, row_idx);
886 }
887 }
888 _ => {
889 values.push(0);
890 validity.push(false);
891 stats.update_null();
892 }
893 }
894 }
895 TypedColumn::UInt64 {
896 values,
897 validity,
898 stats,
899 } => match value {
900 Some(ColumnValue::UInt64(v)) => {
901 values.push(*v);
902 validity.push(true);
903 stats.update_i64(*v as i64);
904 }
905 _ => {
906 values.push(0);
907 validity.push(false);
908 stats.update_null();
909 }
910 },
911 TypedColumn::Float64 {
912 values,
913 validity,
914 stats,
915 } => match value {
916 Some(ColumnValue::Float64(v)) => {
917 values.push(*v);
918 validity.push(true);
919 stats.update_f64(*v);
920 }
921 _ => {
922 values.push(0.0);
923 validity.push(false);
924 stats.update_null();
925 }
926 },
927 TypedColumn::Text {
928 offsets,
929 data,
930 validity,
931 stats,
932 } => match value {
933 Some(ColumnValue::Text(s)) => {
934 data.extend_from_slice(s.as_bytes());
935 offsets.push(data.len() as u32);
936 validity.push(true);
937 stats.row_count += 1;
938 }
939 _ => {
940 offsets.push(data.len() as u32);
941 validity.push(false);
942 stats.update_null();
943 }
944 },
945 TypedColumn::Binary {
946 offsets,
947 data,
948 validity,
949 stats,
950 } => match value {
951 Some(ColumnValue::Binary(b)) => {
952 data.extend_from_slice(b);
953 offsets.push(data.len() as u32);
954 validity.push(true);
955 stats.row_count += 1;
956 }
957 _ => {
958 offsets.push(data.len() as u32);
959 validity.push(false);
960 stats.update_null();
961 }
962 },
963 TypedColumn::Bool {
964 values,
965 validity,
966 stats,
967 len,
968 } => {
969 let idx = *len;
970 *len += 1;
971 let num_words = (*len).div_ceil(64);
972 while values.len() < num_words {
973 values.push(0);
974 }
975 match value {
976 Some(ColumnValue::Bool(v)) => {
977 if *v {
978 let word = idx / 64;
979 let bit = idx % 64;
980 values[word] |= 1 << bit;
981 }
982 validity.push(true);
983 stats.row_count += 1;
984 }
985 _ => {
986 validity.push(false);
987 stats.update_null();
988 }
989 }
990 }
991 }
992 }
993
994 row_idx
995 }
996
997 pub fn memory_size(&self) -> usize {
999 self.columns.values().map(|c| c.data.memory_size()).sum()
1000 }
1001
1002 pub fn memory_comparison(&self) -> MemoryComparison {
1004 let typed_size = self.memory_size();
1005 let row_count = self.row_count() as usize;
1006 let column_count = self.columns.len();
1007
1008 let enum_size = row_count * column_count * 32;
1010
1011 MemoryComparison {
1012 typed_bytes: typed_size,
1013 enum_bytes: enum_size,
1014 savings_ratio: if typed_size > 0 {
1015 enum_size as f64 / typed_size as f64
1016 } else {
1017 1.0
1018 },
1019 }
1020 }
1021}
1022
1023#[derive(Debug, Clone)]
1025pub struct MemoryComparison {
1026 pub typed_bytes: usize,
1027 pub enum_bytes: usize,
1028 pub savings_ratio: f64,
1029}
1030
1031#[derive(Debug, Clone)]
1033pub enum ColumnValue {
1034 Null,
1035 Int64(i64),
1036 UInt64(u64),
1037 Float64(f64),
1038 Text(String),
1039 Binary(Vec<u8>),
1040 Bool(bool),
1041}
1042
1043#[derive(Debug, Default)]
1045pub struct ColumnarStore {
1046 tables: HashMap<String, ColumnarTable>,
1048}
1049
1050impl ColumnarStore {
1051 pub fn new() -> Self {
1053 Self {
1054 tables: HashMap::new(),
1055 }
1056 }
1057
1058 pub fn create_table(&mut self, name: impl Into<String>) -> &mut ColumnarTable {
1060 let name = name.into();
1061 self.tables
1062 .entry(name.clone())
1063 .or_insert_with(|| ColumnarTable::new(name))
1064 }
1065
1066 pub fn get_table(&self, name: &str) -> Option<&ColumnarTable> {
1068 self.tables.get(name)
1069 }
1070
1071 pub fn get_table_mut(&mut self, name: &str) -> Option<&mut ColumnarTable> {
1073 self.tables.get_mut(name)
1074 }
1075
1076 pub fn drop_table(&mut self, name: &str) -> bool {
1078 self.tables.remove(name).is_some()
1079 }
1080
1081 pub fn memory_size(&self) -> usize {
1083 self.tables.values().map(|t| t.memory_size()).sum()
1084 }
1085}
1086
1087#[cfg(test)]
1088mod tests {
1089 use super::*;
1090
1091 #[test]
1092 fn test_validity_bitmap() {
1093 let mut bitmap = ValidityBitmap::new_all_valid(10);
1094 assert_eq!(bitmap.len(), 10);
1095 assert_eq!(bitmap.null_count(), 0);
1096 assert!(bitmap.is_valid(0));
1097 assert!(bitmap.is_valid(9));
1098
1099 bitmap.set_null(5);
1100 assert_eq!(bitmap.null_count(), 1);
1101 assert!(!bitmap.is_valid(5));
1102
1103 bitmap.set_valid(5);
1104 assert_eq!(bitmap.null_count(), 0);
1105 assert!(bitmap.is_valid(5));
1106 }
1107
1108 #[test]
1109 fn test_int64_column() {
1110 let mut col = TypedColumn::new_int64();
1111 col.push_i64(Some(100));
1112 col.push_i64(Some(200));
1113 col.push_i64(None);
1114 col.push_i64(Some(300));
1115
1116 assert_eq!(col.len(), 4);
1117 assert_eq!(col.get_i64(0), Some(100));
1118 assert_eq!(col.get_i64(1), Some(200));
1119 assert_eq!(col.get_i64(2), None);
1120 assert_eq!(col.get_i64(3), Some(300));
1121 assert!(col.is_null(2));
1122
1123 assert_eq!(col.sum_i64(), 600);
1124 }
1125
1126 #[test]
1127 fn test_text_column() {
1128 let mut col = TypedColumn::new_text();
1129 col.push_text(Some("hello"));
1130 col.push_text(Some("world"));
1131 col.push_text(None);
1132 col.push_text(Some("test"));
1133
1134 assert_eq!(col.len(), 4);
1135 assert_eq!(col.get_text(0), Some("hello"));
1136 assert_eq!(col.get_text(1), Some("world"));
1137 assert_eq!(col.get_text(2), None);
1138 assert_eq!(col.get_text(3), Some("test"));
1139 }
1140
1141 #[test]
1142 fn test_bool_column() {
1143 let mut col = TypedColumn::new_bool();
1144 col.push_bool(Some(true));
1145 col.push_bool(Some(false));
1146 col.push_bool(None);
1147 col.push_bool(Some(true));
1148
1149 assert_eq!(col.len(), 4);
1150 assert_eq!(col.get_bool(0), Some(true));
1151 assert_eq!(col.get_bool(1), Some(false));
1152 assert_eq!(col.get_bool(2), None);
1153 assert_eq!(col.get_bool(3), Some(true));
1154
1155 assert!(col.memory_size() < 32);
1158 }
1159
1160 #[test]
1161 fn test_columnar_table() {
1162 let mut table = ColumnarTable::new("users");
1163 table.add_column("id", ColumnType::Int64);
1164 table.add_column("name", ColumnType::Text);
1165 table.add_column("active", ColumnType::Bool);
1166 table.set_primary_key("id");
1167
1168 let mut row1 = HashMap::new();
1169 row1.insert("id".to_string(), ColumnValue::Int64(1));
1170 row1.insert("name".to_string(), ColumnValue::Text("Alice".to_string()));
1171 row1.insert("active".to_string(), ColumnValue::Bool(true));
1172 table.insert_row(&row1);
1173
1174 let mut row2 = HashMap::new();
1175 row2.insert("id".to_string(), ColumnValue::Int64(2));
1176 row2.insert("name".to_string(), ColumnValue::Text("Bob".to_string()));
1177 row2.insert("active".to_string(), ColumnValue::Bool(false));
1178 table.insert_row(&row2);
1179
1180 assert_eq!(table.row_count(), 2);
1181 assert_eq!(table.get_by_pk(1), Some(0));
1182 assert_eq!(table.get_by_pk(2), Some(1));
1183 assert_eq!(table.get_by_pk(3), None);
1184
1185 let id_col = table.get_column("id").unwrap();
1186 assert_eq!(id_col.data.get_i64(0), Some(1));
1187 assert_eq!(id_col.data.get_i64(1), Some(2));
1188 }
1189
1190 #[test]
1191 fn test_memory_savings() {
1192 let mut table = ColumnarTable::new("test");
1193 table.add_column("id", ColumnType::Int64);
1194 table.add_column("value", ColumnType::Float64);
1195 table.add_column("flag", ColumnType::Bool);
1196
1197 for i in 0..1000 {
1199 let mut row = HashMap::new();
1200 row.insert("id".to_string(), ColumnValue::Int64(i));
1201 row.insert("value".to_string(), ColumnValue::Float64(i as f64 * 1.5));
1202 row.insert("flag".to_string(), ColumnValue::Bool(i % 2 == 0));
1203 table.insert_row(&row);
1204 }
1205
1206 let comparison = table.memory_comparison();
1207
1208 assert!(
1212 comparison.savings_ratio > 3.0,
1213 "Expected 3x+ savings, got {:.2}x",
1214 comparison.savings_ratio
1215 );
1216 }
1217
1218 #[test]
1219 fn test_simd_sum() {
1220 let mut col = TypedColumn::new_int64();
1221 for i in 0..10000 {
1222 col.push_i64(Some(i));
1223 }
1224
1225 let sum = col.sum_i64();
1226 let expected: i64 = (0..10000).sum();
1227 assert_eq!(sum, expected);
1228 }
1229
1230 #[test]
1231 fn test_columnar_store() {
1232 let mut store = ColumnarStore::new();
1233
1234 {
1235 let table = store.create_table("users");
1236 table.add_column("id", ColumnType::Int64);
1237 table.add_column("name", ColumnType::Text);
1238 }
1239
1240 assert!(store.get_table("users").is_some());
1241 assert!(store.get_table("orders").is_none());
1242
1243 store.drop_table("users");
1244 assert!(store.get_table("users").is_none());
1245 }
1246
1247 #[test]
1248 fn test_column_stats() {
1249 let mut col = TypedColumn::new_int64();
1250 col.push_i64(Some(10));
1251 col.push_i64(Some(50));
1252 col.push_i64(None);
1253 col.push_i64(Some(30));
1254 col.push_i64(Some(20));
1255
1256 let stats = col.stats();
1257 assert_eq!(stats.min_i64, Some(10));
1258 assert_eq!(stats.max_i64, Some(50));
1259 assert_eq!(stats.null_count, 1);
1260 assert_eq!(stats.row_count, 5);
1261 }
1262
1263 #[test]
1264 fn test_typed_column_value_at() {
1265 use crate::SochValue;
1266
1267 let mut col = TypedColumn::new_int64();
1269 col.push_i64(Some(42));
1270 col.push_i64(None);
1271 col.push_i64(Some(-7));
1272 assert_eq!(col.value_at(0), SochValue::Int(42));
1273 assert_eq!(col.value_at(1), SochValue::Null);
1274 assert_eq!(col.value_at(2), SochValue::Int(-7));
1275 assert_eq!(col.value_at(99), SochValue::Null); let mut fcol = TypedColumn::new_float64();
1279 fcol.push_f64(Some(3.15));
1280 fcol.push_f64(None);
1281 assert_eq!(fcol.value_at(0), SochValue::Float(3.15));
1282 assert_eq!(fcol.value_at(1), SochValue::Null);
1283
1284 let mut tcol = TypedColumn::new_text();
1286 tcol.push_text(Some("hello"));
1287 tcol.push_text(None);
1288 tcol.push_text(Some("world"));
1289 assert_eq!(tcol.value_at(0), SochValue::Text("hello".to_string()));
1290 assert_eq!(tcol.value_at(1), SochValue::Null);
1291 assert_eq!(tcol.value_at(2), SochValue::Text("world".to_string()));
1292
1293 let mut bcol = TypedColumn::new_bool();
1295 bcol.push_bool(Some(true));
1296 bcol.push_bool(Some(false));
1297 bcol.push_bool(None);
1298 assert_eq!(bcol.value_at(0), SochValue::Bool(true));
1299 assert_eq!(bcol.value_at(1), SochValue::Bool(false));
1300 assert_eq!(bcol.value_at(2), SochValue::Null);
1301 }
1302}