1use roaring::RoaringBitmap;
24use rustc_hash::FxHashMap;
25use std::collections::HashMap;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct StringId(u32);
30
31#[derive(Debug, Default)]
33pub struct StringTable {
34 string_to_id: FxHashMap<String, StringId>,
36 id_to_string: Vec<String>,
38}
39
40impl StringTable {
41 #[must_use]
43 pub fn new() -> Self {
44 Self::default()
45 }
46
47 pub fn intern(&mut self, s: &str) -> StringId {
51 if let Some(&id) = self.string_to_id.get(s) {
52 return id;
53 }
54
55 #[allow(clippy::cast_possible_truncation)]
56 let id = StringId(self.id_to_string.len() as u32);
57 self.id_to_string.push(s.to_string());
58 self.string_to_id.insert(s.to_string(), id);
59 id
60 }
61
62 #[must_use]
64 pub fn get(&self, id: StringId) -> Option<&str> {
65 self.id_to_string.get(id.0 as usize).map(String::as_str)
66 }
67
68 #[must_use]
70 pub fn get_id(&self, s: &str) -> Option<StringId> {
71 self.string_to_id.get(s).copied()
72 }
73
74 #[must_use]
76 pub fn len(&self) -> usize {
77 self.id_to_string.len()
78 }
79
80 #[must_use]
82 pub fn is_empty(&self) -> bool {
83 self.id_to_string.is_empty()
84 }
85}
86
87#[derive(Debug)]
89pub enum TypedColumn {
90 Int(Vec<Option<i64>>),
92 Float(Vec<Option<f64>>),
94 String(Vec<Option<StringId>>),
96 Bool(Vec<Option<bool>>),
98}
99
100impl TypedColumn {
101 #[must_use]
103 pub fn new_int(capacity: usize) -> Self {
104 Self::Int(Vec::with_capacity(capacity))
105 }
106
107 #[must_use]
109 pub fn new_float(capacity: usize) -> Self {
110 Self::Float(Vec::with_capacity(capacity))
111 }
112
113 #[must_use]
115 pub fn new_string(capacity: usize) -> Self {
116 Self::String(Vec::with_capacity(capacity))
117 }
118
119 #[must_use]
121 pub fn new_bool(capacity: usize) -> Self {
122 Self::Bool(Vec::with_capacity(capacity))
123 }
124
125 #[must_use]
127 pub fn len(&self) -> usize {
128 match self {
129 Self::Int(v) => v.len(),
130 Self::Float(v) => v.len(),
131 Self::String(v) => v.len(),
132 Self::Bool(v) => v.len(),
133 }
134 }
135
136 #[must_use]
138 pub fn is_empty(&self) -> bool {
139 self.len() == 0
140 }
141
142 pub fn push_null(&mut self) {
144 match self {
145 Self::Int(v) => v.push(None),
146 Self::Float(v) => v.push(None),
147 Self::String(v) => v.push(None),
148 Self::Bool(v) => v.push(None),
149 }
150 }
151}
152
153#[derive(Debug, Default)]
155pub struct ColumnStore {
156 columns: HashMap<String, TypedColumn>,
158 string_table: StringTable,
160 row_count: usize,
162}
163
164impl ColumnStore {
165 #[must_use]
167 pub fn new() -> Self {
168 Self::default()
169 }
170
171 #[must_use]
177 pub fn with_schema(fields: &[(&str, ColumnType)]) -> Self {
178 let mut store = Self::new();
179 for (name, col_type) in fields {
180 store.add_column(name, *col_type);
181 }
182 store
183 }
184
185 pub fn add_column(&mut self, name: &str, col_type: ColumnType) {
187 let column = match col_type {
188 ColumnType::Int => TypedColumn::new_int(0),
189 ColumnType::Float => TypedColumn::new_float(0),
190 ColumnType::String => TypedColumn::new_string(0),
191 ColumnType::Bool => TypedColumn::new_bool(0),
192 };
193 self.columns.insert(name.to_string(), column);
194 }
195
196 #[must_use]
198 pub fn row_count(&self) -> usize {
199 self.row_count
200 }
201
202 #[must_use]
204 pub fn string_table(&self) -> &StringTable {
205 &self.string_table
206 }
207
208 pub fn string_table_mut(&mut self) -> &mut StringTable {
210 &mut self.string_table
211 }
212
213 pub fn push_row(&mut self, values: &[(&str, ColumnValue)]) {
217 let value_map: FxHashMap<&str, &ColumnValue> =
219 values.iter().map(|(k, v)| (*k, v)).collect();
220
221 for (name, column) in &mut self.columns {
223 if let Some(value) = value_map.get(name.as_str()) {
224 match value {
225 ColumnValue::Null => column.push_null(),
226 ColumnValue::Int(v) => {
227 if let TypedColumn::Int(col) = column {
228 col.push(Some(*v));
229 } else {
230 column.push_null();
231 }
232 }
233 ColumnValue::Float(v) => {
234 if let TypedColumn::Float(col) = column {
235 col.push(Some(*v));
236 } else {
237 column.push_null();
238 }
239 }
240 ColumnValue::String(id) => {
241 if let TypedColumn::String(col) = column {
242 col.push(Some(*id));
243 } else {
244 column.push_null();
245 }
246 }
247 ColumnValue::Bool(v) => {
248 if let TypedColumn::Bool(col) = column {
249 col.push(Some(*v));
250 } else {
251 column.push_null();
252 }
253 }
254 }
255 } else {
256 column.push_null();
257 }
258 }
259
260 self.row_count += 1;
261 }
262
263 #[must_use]
265 pub fn get_column(&self, name: &str) -> Option<&TypedColumn> {
266 self.columns.get(name)
267 }
268
269 #[must_use]
273 pub fn filter_eq_int(&self, column: &str, value: i64) -> Vec<usize> {
274 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
275 return Vec::new();
276 };
277
278 col.iter()
279 .enumerate()
280 .filter_map(|(idx, v)| if *v == Some(value) { Some(idx) } else { None })
281 .collect()
282 }
283
284 #[must_use]
288 pub fn filter_eq_string(&self, column: &str, value: &str) -> Vec<usize> {
289 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
290 return Vec::new();
291 };
292
293 let Some(string_id) = self.string_table.get_id(value) else {
294 return Vec::new(); };
296
297 col.iter()
298 .enumerate()
299 .filter_map(|(idx, v)| {
300 if *v == Some(string_id) {
301 Some(idx)
302 } else {
303 None
304 }
305 })
306 .collect()
307 }
308
309 #[must_use]
313 pub fn filter_gt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
314 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
315 return Vec::new();
316 };
317
318 col.iter()
319 .enumerate()
320 .filter_map(|(idx, v)| match v {
321 Some(val) if *val > threshold => Some(idx),
322 _ => None,
323 })
324 .collect()
325 }
326
327 #[must_use]
329 pub fn filter_lt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
330 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
331 return Vec::new();
332 };
333
334 col.iter()
335 .enumerate()
336 .filter_map(|(idx, v)| match v {
337 Some(val) if *val < threshold => Some(idx),
338 _ => None,
339 })
340 .collect()
341 }
342
343 #[must_use]
345 pub fn filter_range_int(&self, column: &str, low: i64, high: i64) -> Vec<usize> {
346 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
347 return Vec::new();
348 };
349
350 col.iter()
351 .enumerate()
352 .filter_map(|(idx, v)| match v {
353 Some(val) if *val > low && *val < high => Some(idx),
354 _ => None,
355 })
356 .collect()
357 }
358
359 #[must_use]
363 pub fn filter_in_string(&self, column: &str, values: &[&str]) -> Vec<usize> {
364 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
365 return Vec::new();
366 };
367
368 let ids: Vec<StringId> = values
370 .iter()
371 .filter_map(|s| self.string_table.get_id(s))
372 .collect();
373
374 if ids.is_empty() {
375 return Vec::new();
376 }
377
378 if ids.len() > 16 {
381 let id_set: rustc_hash::FxHashSet<StringId> = ids.into_iter().collect();
382 col.iter()
383 .enumerate()
384 .filter_map(|(idx, v)| match v {
385 Some(id) if id_set.contains(id) => Some(idx),
386 _ => None,
387 })
388 .collect()
389 } else {
390 col.iter()
391 .enumerate()
392 .filter_map(|(idx, v)| match v {
393 Some(id) if ids.contains(id) => Some(idx),
394 _ => None,
395 })
396 .collect()
397 }
398 }
399
400 #[must_use]
404 pub fn count_eq_int(&self, column: &str, value: i64) -> usize {
405 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
406 return 0;
407 };
408
409 col.iter().filter(|v| **v == Some(value)).count()
410 }
411
412 #[must_use]
414 pub fn count_eq_string(&self, column: &str, value: &str) -> usize {
415 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
416 return 0;
417 };
418
419 let Some(string_id) = self.string_table.get_id(value) else {
420 return 0;
421 };
422
423 col.iter().filter(|v| **v == Some(string_id)).count()
424 }
425
426 #[must_use]
435 #[allow(clippy::cast_possible_truncation)]
436 pub fn filter_eq_int_bitmap(&self, column: &str, value: i64) -> RoaringBitmap {
437 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
438 return RoaringBitmap::new();
439 };
440
441 col.iter()
442 .enumerate()
443 .filter_map(|(idx, v)| {
444 if *v == Some(value) {
445 Some(idx as u32)
446 } else {
447 None
448 }
449 })
450 .collect()
451 }
452
453 #[must_use]
455 #[allow(clippy::cast_possible_truncation)]
456 pub fn filter_eq_string_bitmap(&self, column: &str, value: &str) -> RoaringBitmap {
457 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
458 return RoaringBitmap::new();
459 };
460
461 let Some(string_id) = self.string_table.get_id(value) else {
462 return RoaringBitmap::new();
463 };
464
465 col.iter()
466 .enumerate()
467 .filter_map(|(idx, v)| {
468 if *v == Some(string_id) {
469 Some(idx as u32)
470 } else {
471 None
472 }
473 })
474 .collect()
475 }
476
477 #[must_use]
479 #[allow(clippy::cast_possible_truncation)]
480 pub fn filter_range_int_bitmap(&self, column: &str, low: i64, high: i64) -> RoaringBitmap {
481 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
482 return RoaringBitmap::new();
483 };
484
485 col.iter()
486 .enumerate()
487 .filter_map(|(idx, v)| match v {
488 Some(val) if *val > low && *val < high => Some(idx as u32),
489 _ => None,
490 })
491 .collect()
492 }
493
494 #[must_use]
498 pub fn bitmap_and(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
499 a & b
500 }
501
502 #[must_use]
506 pub fn bitmap_or(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
507 a | b
508 }
509}
510
511#[derive(Debug, Clone, Copy, PartialEq, Eq)]
513pub enum ColumnType {
514 Int,
516 Float,
518 String,
520 Bool,
522}
523
524#[derive(Debug, Clone)]
526pub enum ColumnValue {
527 Int(i64),
529 Float(f64),
531 String(StringId),
533 Bool(bool),
535 Null,
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542
543 #[test]
548 fn test_string_table_intern() {
549 let mut table = StringTable::new();
551
552 let id1 = table.intern("hello");
554 let id2 = table.intern("world");
555 let id3 = table.intern("hello"); assert_eq!(id1, id3);
559 assert_ne!(id1, id2);
560 assert_eq!(table.len(), 2);
561 }
562
563 #[test]
564 fn test_string_table_get() {
565 let mut table = StringTable::new();
567 let id = table.intern("test");
568
569 assert_eq!(table.get(id), Some("test"));
571 }
572
573 #[test]
574 fn test_string_table_get_id() {
575 let mut table = StringTable::new();
577 table.intern("existing");
578
579 assert!(table.get_id("existing").is_some());
581 assert!(table.get_id("missing").is_none());
582 }
583
584 #[test]
589 fn test_column_store_new() {
590 let store = ColumnStore::new();
592
593 assert_eq!(store.row_count(), 0);
595 }
596
597 #[test]
598 fn test_column_store_with_schema() {
599 let store = ColumnStore::with_schema(&[
601 ("category", ColumnType::String),
602 ("price", ColumnType::Int),
603 ]);
604
605 assert!(store.get_column("category").is_some());
607 assert!(store.get_column("price").is_some());
608 assert!(store.get_column("missing").is_none());
609 }
610
611 #[test]
612 fn test_column_store_push_row() {
613 let mut store = ColumnStore::with_schema(&[
615 ("category", ColumnType::String),
616 ("price", ColumnType::Int),
617 ]);
618
619 let cat_id = store.string_table_mut().intern("tech");
620
621 store.push_row(&[
623 ("category", ColumnValue::String(cat_id)),
624 ("price", ColumnValue::Int(100)),
625 ]);
626
627 assert_eq!(store.row_count(), 1);
629 }
630
631 #[test]
636 fn test_filter_eq_int() {
637 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
639 store.push_row(&[("price", ColumnValue::Int(100))]);
640 store.push_row(&[("price", ColumnValue::Int(200))]);
641 store.push_row(&[("price", ColumnValue::Int(100))]);
642
643 let matches = store.filter_eq_int("price", 100);
645
646 assert_eq!(matches, vec![0, 2]);
648 }
649
650 #[test]
651 fn test_filter_eq_string() {
652 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
654
655 let tech_id = store.string_table_mut().intern("tech");
656 let science_id = store.string_table_mut().intern("science");
657
658 store.push_row(&[("category", ColumnValue::String(tech_id))]);
659 store.push_row(&[("category", ColumnValue::String(science_id))]);
660 store.push_row(&[("category", ColumnValue::String(tech_id))]);
661
662 let matches = store.filter_eq_string("category", "tech");
664
665 assert_eq!(matches, vec![0, 2]);
667 }
668
669 #[test]
670 fn test_filter_gt_int() {
671 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
673 store.push_row(&[("price", ColumnValue::Int(50))]);
674 store.push_row(&[("price", ColumnValue::Int(100))]);
675 store.push_row(&[("price", ColumnValue::Int(150))]);
676
677 let matches = store.filter_gt_int("price", 75);
679
680 assert_eq!(matches, vec![1, 2]);
682 }
683
684 #[test]
685 fn test_filter_lt_int() {
686 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
688 store.push_row(&[("price", ColumnValue::Int(50))]);
689 store.push_row(&[("price", ColumnValue::Int(100))]);
690 store.push_row(&[("price", ColumnValue::Int(150))]);
691
692 let matches = store.filter_lt_int("price", 100);
694
695 assert_eq!(matches, vec![0]);
697 }
698
699 #[test]
700 fn test_filter_range_int() {
701 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
703 store.push_row(&[("price", ColumnValue::Int(50))]);
704 store.push_row(&[("price", ColumnValue::Int(100))]);
705 store.push_row(&[("price", ColumnValue::Int(150))]);
706 store.push_row(&[("price", ColumnValue::Int(200))]);
707
708 let matches = store.filter_range_int("price", 75, 175);
710
711 assert_eq!(matches, vec![1, 2]);
713 }
714
715 #[test]
716 fn test_filter_in_string() {
717 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
719
720 let tech_id = store.string_table_mut().intern("tech");
721 let science_id = store.string_table_mut().intern("science");
722 let art_id = store.string_table_mut().intern("art");
723
724 store.push_row(&[("category", ColumnValue::String(tech_id))]);
725 store.push_row(&[("category", ColumnValue::String(science_id))]);
726 store.push_row(&[("category", ColumnValue::String(art_id))]);
727 store.push_row(&[("category", ColumnValue::String(tech_id))]);
728
729 let matches = store.filter_in_string("category", &["tech", "art"]);
731
732 assert_eq!(matches, vec![0, 2, 3]);
734 }
735
736 #[test]
737 fn test_filter_with_null_values() {
738 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
740 store.push_row(&[("price", ColumnValue::Int(100))]);
741 store.push_row(&[("price", ColumnValue::Null)]);
742 store.push_row(&[("price", ColumnValue::Int(100))]);
743
744 let matches = store.filter_eq_int("price", 100);
746
747 assert_eq!(matches, vec![0, 2]);
749 }
750
751 #[test]
752 fn test_filter_missing_column() {
753 let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
755
756 let matches = store.filter_eq_int("missing", 100);
758
759 assert!(matches.is_empty());
761 }
762
763 #[test]
768 fn test_count_eq_int() {
769 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
771 store.push_row(&[("price", ColumnValue::Int(100))]);
772 store.push_row(&[("price", ColumnValue::Int(200))]);
773 store.push_row(&[("price", ColumnValue::Int(100))]);
774
775 let count = store.count_eq_int("price", 100);
777
778 assert_eq!(count, 2);
780 }
781
782 #[test]
783 fn test_count_eq_string() {
784 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
786
787 let tech_id = store.string_table_mut().intern("tech");
788 let science_id = store.string_table_mut().intern("science");
789
790 store.push_row(&[("category", ColumnValue::String(tech_id))]);
791 store.push_row(&[("category", ColumnValue::String(science_id))]);
792 store.push_row(&[("category", ColumnValue::String(tech_id))]);
793
794 let count = store.count_eq_string("category", "tech");
796
797 assert_eq!(count, 2);
799 }
800
801 #[test]
806 fn test_filter_eq_int_bitmap() {
807 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
809 store.push_row(&[("price", ColumnValue::Int(100))]);
810 store.push_row(&[("price", ColumnValue::Int(200))]);
811 store.push_row(&[("price", ColumnValue::Int(100))]);
812
813 let bitmap = store.filter_eq_int_bitmap("price", 100);
815
816 assert!(bitmap.contains(0));
818 assert!(!bitmap.contains(1));
819 assert!(bitmap.contains(2));
820 assert_eq!(bitmap.len(), 2);
821 }
822
823 #[test]
824 fn test_filter_eq_string_bitmap() {
825 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
827
828 let tech_id = store.string_table_mut().intern("tech");
829 let science_id = store.string_table_mut().intern("science");
830
831 store.push_row(&[("category", ColumnValue::String(tech_id))]);
832 store.push_row(&[("category", ColumnValue::String(science_id))]);
833 store.push_row(&[("category", ColumnValue::String(tech_id))]);
834
835 let bitmap = store.filter_eq_string_bitmap("category", "tech");
837
838 assert!(bitmap.contains(0));
840 assert!(!bitmap.contains(1));
841 assert!(bitmap.contains(2));
842 assert_eq!(bitmap.len(), 2);
843 }
844
845 #[test]
846 fn test_filter_range_int_bitmap() {
847 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
849 store.push_row(&[("price", ColumnValue::Int(50))]);
850 store.push_row(&[("price", ColumnValue::Int(100))]);
851 store.push_row(&[("price", ColumnValue::Int(150))]);
852 store.push_row(&[("price", ColumnValue::Int(200))]);
853
854 let bitmap = store.filter_range_int_bitmap("price", 75, 175);
856
857 assert!(!bitmap.contains(0));
859 assert!(bitmap.contains(1));
860 assert!(bitmap.contains(2));
861 assert!(!bitmap.contains(3));
862 assert_eq!(bitmap.len(), 2);
863 }
864
865 #[test]
866 fn test_bitmap_and() {
867 let mut store = ColumnStore::with_schema(&[
869 ("price", ColumnType::Int),
870 ("category", ColumnType::String),
871 ]);
872
873 let tech_id = store.string_table_mut().intern("tech");
874 let science_id = store.string_table_mut().intern("science");
875
876 store.push_row(&[
877 ("price", ColumnValue::Int(100)),
878 ("category", ColumnValue::String(tech_id)),
879 ]);
880 store.push_row(&[
881 ("price", ColumnValue::Int(200)),
882 ("category", ColumnValue::String(tech_id)),
883 ]);
884 store.push_row(&[
885 ("price", ColumnValue::Int(100)),
886 ("category", ColumnValue::String(science_id)),
887 ]);
888
889 let price_bitmap = store.filter_eq_int_bitmap("price", 100);
891 let category_bitmap = store.filter_eq_string_bitmap("category", "tech");
892 let combined = ColumnStore::bitmap_and(&price_bitmap, &category_bitmap);
893
894 assert!(combined.contains(0));
896 assert!(!combined.contains(1));
897 assert!(!combined.contains(2));
898 assert_eq!(combined.len(), 1);
899 }
900
901 #[test]
902 fn test_bitmap_or() {
903 let mut store = ColumnStore::with_schema(&[
905 ("price", ColumnType::Int),
906 ("category", ColumnType::String),
907 ]);
908
909 let tech_id = store.string_table_mut().intern("tech");
910 let science_id = store.string_table_mut().intern("science");
911
912 store.push_row(&[
913 ("price", ColumnValue::Int(100)),
914 ("category", ColumnValue::String(tech_id)),
915 ]);
916 store.push_row(&[
917 ("price", ColumnValue::Int(200)),
918 ("category", ColumnValue::String(science_id)),
919 ]);
920 store.push_row(&[
921 ("price", ColumnValue::Int(300)),
922 ("category", ColumnValue::String(science_id)),
923 ]);
924
925 let price_bitmap = store.filter_eq_int_bitmap("price", 100);
927 let category_bitmap = store.filter_eq_string_bitmap("category", "science");
928 let combined = ColumnStore::bitmap_or(&price_bitmap, &category_bitmap);
929
930 assert!(combined.contains(0));
932 assert!(combined.contains(1));
933 assert!(combined.contains(2));
934 assert_eq!(combined.len(), 3);
935 }
936
937 #[test]
938 fn test_filter_bitmap_missing_column() {
939 let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
941
942 let bitmap = store.filter_eq_int_bitmap("missing", 100);
944
945 assert!(bitmap.is_empty());
947 }
948
949 #[test]
950 fn test_filter_bitmap_missing_string_value() {
951 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
953 let tech_id = store.string_table_mut().intern("tech");
954 store.push_row(&[("category", ColumnValue::String(tech_id))]);
955
956 let bitmap = store.filter_eq_string_bitmap("category", "nonexistent");
958
959 assert!(bitmap.is_empty());
961 }
962
963 #[test]
964 fn test_count_eq_string_missing_value() {
965 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
967 let tech_id = store.string_table_mut().intern("tech");
968 store.push_row(&[("category", ColumnValue::String(tech_id))]);
969
970 let count = store.count_eq_string("category", "nonexistent");
972
973 assert_eq!(count, 0);
975 }
976
977 #[test]
978 fn test_add_column() {
979 let mut store = ColumnStore::new();
981
982 store.add_column("price", ColumnType::Int);
984 store.add_column("rating", ColumnType::Float);
985
986 assert!(store.get_column("price").is_some());
988 assert!(store.get_column("rating").is_some());
989 }
990}