1use roaring::RoaringBitmap;
24use rustc_hash::FxHashMap;
25use std::collections::HashMap;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct StringId(u32);
30
31#[derive(Debug, Default)]
33pub struct StringTable {
34 string_to_id: FxHashMap<String, StringId>,
36 id_to_string: Vec<String>,
38}
39
40impl StringTable {
41 #[must_use]
43 pub fn new() -> Self {
44 Self::default()
45 }
46
47 pub fn intern(&mut self, s: &str) -> StringId {
51 if let Some(&id) = self.string_to_id.get(s) {
52 return id;
53 }
54
55 #[allow(clippy::cast_possible_truncation)]
56 let id = StringId(self.id_to_string.len() as u32);
57 self.id_to_string.push(s.to_string());
58 self.string_to_id.insert(s.to_string(), id);
59 id
60 }
61
62 #[must_use]
64 pub fn get(&self, id: StringId) -> Option<&str> {
65 self.id_to_string.get(id.0 as usize).map(String::as_str)
66 }
67
68 #[must_use]
70 pub fn get_id(&self, s: &str) -> Option<StringId> {
71 self.string_to_id.get(s).copied()
72 }
73
74 #[must_use]
76 pub fn len(&self) -> usize {
77 self.id_to_string.len()
78 }
79
80 #[must_use]
82 pub fn is_empty(&self) -> bool {
83 self.id_to_string.is_empty()
84 }
85}
86
87#[derive(Debug)]
89pub enum TypedColumn {
90 Int(Vec<Option<i64>>),
92 Float(Vec<Option<f64>>),
94 String(Vec<Option<StringId>>),
96 Bool(Vec<Option<bool>>),
98}
99
100impl TypedColumn {
101 #[must_use]
103 pub fn new_int(capacity: usize) -> Self {
104 Self::Int(Vec::with_capacity(capacity))
105 }
106
107 #[must_use]
109 pub fn new_float(capacity: usize) -> Self {
110 Self::Float(Vec::with_capacity(capacity))
111 }
112
113 #[must_use]
115 pub fn new_string(capacity: usize) -> Self {
116 Self::String(Vec::with_capacity(capacity))
117 }
118
119 #[must_use]
121 pub fn new_bool(capacity: usize) -> Self {
122 Self::Bool(Vec::with_capacity(capacity))
123 }
124
125 #[must_use]
127 pub fn len(&self) -> usize {
128 match self {
129 Self::Int(v) => v.len(),
130 Self::Float(v) => v.len(),
131 Self::String(v) => v.len(),
132 Self::Bool(v) => v.len(),
133 }
134 }
135
136 #[must_use]
138 pub fn is_empty(&self) -> bool {
139 self.len() == 0
140 }
141
142 pub fn push_null(&mut self) {
144 match self {
145 Self::Int(v) => v.push(None),
146 Self::Float(v) => v.push(None),
147 Self::String(v) => v.push(None),
148 Self::Bool(v) => v.push(None),
149 }
150 }
151}
152
153#[derive(Debug, Default)]
155pub struct ColumnStore {
156 columns: HashMap<String, TypedColumn>,
158 string_table: StringTable,
160 row_count: usize,
162}
163
164impl ColumnStore {
165 #[must_use]
167 pub fn new() -> Self {
168 Self::default()
169 }
170
171 #[must_use]
177 pub fn with_schema(fields: &[(&str, ColumnType)]) -> Self {
178 let mut store = Self::new();
179 for (name, col_type) in fields {
180 store.add_column(name, *col_type);
181 }
182 store
183 }
184
185 pub fn add_column(&mut self, name: &str, col_type: ColumnType) {
187 let column = match col_type {
188 ColumnType::Int => TypedColumn::new_int(0),
189 ColumnType::Float => TypedColumn::new_float(0),
190 ColumnType::String => TypedColumn::new_string(0),
191 ColumnType::Bool => TypedColumn::new_bool(0),
192 };
193 self.columns.insert(name.to_string(), column);
194 }
195
196 #[must_use]
198 pub fn row_count(&self) -> usize {
199 self.row_count
200 }
201
202 #[must_use]
204 pub fn string_table(&self) -> &StringTable {
205 &self.string_table
206 }
207
208 pub fn string_table_mut(&mut self) -> &mut StringTable {
210 &mut self.string_table
211 }
212
213 pub fn push_row(&mut self, values: &[(&str, ColumnValue)]) {
217 let value_map: FxHashMap<&str, &ColumnValue> =
219 values.iter().map(|(k, v)| (*k, v)).collect();
220
221 for (name, column) in &mut self.columns {
223 if let Some(value) = value_map.get(name.as_str()) {
224 match value {
225 ColumnValue::Null => column.push_null(),
226 ColumnValue::Int(v) => {
227 if let TypedColumn::Int(col) = column {
228 col.push(Some(*v));
229 } else {
230 column.push_null();
231 }
232 }
233 ColumnValue::Float(v) => {
234 if let TypedColumn::Float(col) = column {
235 col.push(Some(*v));
236 } else {
237 column.push_null();
238 }
239 }
240 ColumnValue::String(id) => {
241 if let TypedColumn::String(col) = column {
242 col.push(Some(*id));
243 } else {
244 column.push_null();
245 }
246 }
247 ColumnValue::Bool(v) => {
248 if let TypedColumn::Bool(col) = column {
249 col.push(Some(*v));
250 } else {
251 column.push_null();
252 }
253 }
254 }
255 } else {
256 column.push_null();
257 }
258 }
259
260 self.row_count += 1;
261 }
262
263 #[must_use]
265 pub fn get_column(&self, name: &str) -> Option<&TypedColumn> {
266 self.columns.get(name)
267 }
268
269 #[must_use]
273 pub fn filter_eq_int(&self, column: &str, value: i64) -> Vec<usize> {
274 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
275 return Vec::new();
276 };
277
278 col.iter()
279 .enumerate()
280 .filter_map(|(idx, v)| if *v == Some(value) { Some(idx) } else { None })
281 .collect()
282 }
283
284 #[must_use]
288 pub fn filter_eq_string(&self, column: &str, value: &str) -> Vec<usize> {
289 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
290 return Vec::new();
291 };
292
293 let Some(string_id) = self.string_table.get_id(value) else {
294 return Vec::new(); };
296
297 col.iter()
298 .enumerate()
299 .filter_map(|(idx, v)| {
300 if *v == Some(string_id) {
301 Some(idx)
302 } else {
303 None
304 }
305 })
306 .collect()
307 }
308
309 #[must_use]
313 pub fn filter_gt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
314 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
315 return Vec::new();
316 };
317
318 col.iter()
319 .enumerate()
320 .filter_map(|(idx, v)| match v {
321 Some(val) if *val > threshold => Some(idx),
322 _ => None,
323 })
324 .collect()
325 }
326
327 #[must_use]
329 pub fn filter_lt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
330 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
331 return Vec::new();
332 };
333
334 col.iter()
335 .enumerate()
336 .filter_map(|(idx, v)| match v {
337 Some(val) if *val < threshold => Some(idx),
338 _ => None,
339 })
340 .collect()
341 }
342
343 #[must_use]
345 pub fn filter_range_int(&self, column: &str, low: i64, high: i64) -> Vec<usize> {
346 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
347 return Vec::new();
348 };
349
350 col.iter()
351 .enumerate()
352 .filter_map(|(idx, v)| match v {
353 Some(val) if *val > low && *val < high => Some(idx),
354 _ => None,
355 })
356 .collect()
357 }
358
359 #[must_use]
363 pub fn filter_in_string(&self, column: &str, values: &[&str]) -> Vec<usize> {
364 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
365 return Vec::new();
366 };
367
368 let ids: Vec<StringId> = values
370 .iter()
371 .filter_map(|s| self.string_table.get_id(s))
372 .collect();
373
374 if ids.is_empty() {
375 return Vec::new();
376 }
377
378 col.iter()
379 .enumerate()
380 .filter_map(|(idx, v)| match v {
381 Some(id) if ids.contains(id) => Some(idx),
382 _ => None,
383 })
384 .collect()
385 }
386
387 #[must_use]
391 pub fn count_eq_int(&self, column: &str, value: i64) -> usize {
392 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
393 return 0;
394 };
395
396 col.iter().filter(|v| **v == Some(value)).count()
397 }
398
399 #[must_use]
401 pub fn count_eq_string(&self, column: &str, value: &str) -> usize {
402 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
403 return 0;
404 };
405
406 let Some(string_id) = self.string_table.get_id(value) else {
407 return 0;
408 };
409
410 col.iter().filter(|v| **v == Some(string_id)).count()
411 }
412
413 #[must_use]
422 #[allow(clippy::cast_possible_truncation)]
423 pub fn filter_eq_int_bitmap(&self, column: &str, value: i64) -> RoaringBitmap {
424 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
425 return RoaringBitmap::new();
426 };
427
428 col.iter()
429 .enumerate()
430 .filter_map(|(idx, v)| {
431 if *v == Some(value) {
432 Some(idx as u32)
433 } else {
434 None
435 }
436 })
437 .collect()
438 }
439
440 #[must_use]
442 #[allow(clippy::cast_possible_truncation)]
443 pub fn filter_eq_string_bitmap(&self, column: &str, value: &str) -> RoaringBitmap {
444 let Some(TypedColumn::String(col)) = self.columns.get(column) else {
445 return RoaringBitmap::new();
446 };
447
448 let Some(string_id) = self.string_table.get_id(value) else {
449 return RoaringBitmap::new();
450 };
451
452 col.iter()
453 .enumerate()
454 .filter_map(|(idx, v)| {
455 if *v == Some(string_id) {
456 Some(idx as u32)
457 } else {
458 None
459 }
460 })
461 .collect()
462 }
463
464 #[must_use]
466 #[allow(clippy::cast_possible_truncation)]
467 pub fn filter_range_int_bitmap(&self, column: &str, low: i64, high: i64) -> RoaringBitmap {
468 let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
469 return RoaringBitmap::new();
470 };
471
472 col.iter()
473 .enumerate()
474 .filter_map(|(idx, v)| match v {
475 Some(val) if *val > low && *val < high => Some(idx as u32),
476 _ => None,
477 })
478 .collect()
479 }
480
481 #[must_use]
485 pub fn bitmap_and(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
486 a & b
487 }
488
489 #[must_use]
493 pub fn bitmap_or(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
494 a | b
495 }
496}
497
498#[derive(Debug, Clone, Copy, PartialEq, Eq)]
500pub enum ColumnType {
501 Int,
503 Float,
505 String,
507 Bool,
509}
510
511#[derive(Debug, Clone)]
513pub enum ColumnValue {
514 Int(i64),
516 Float(f64),
518 String(StringId),
520 Bool(bool),
522 Null,
524}
525
526#[cfg(test)]
527mod tests {
528 use super::*;
529
530 #[test]
535 fn test_string_table_intern() {
536 let mut table = StringTable::new();
538
539 let id1 = table.intern("hello");
541 let id2 = table.intern("world");
542 let id3 = table.intern("hello"); assert_eq!(id1, id3);
546 assert_ne!(id1, id2);
547 assert_eq!(table.len(), 2);
548 }
549
550 #[test]
551 fn test_string_table_get() {
552 let mut table = StringTable::new();
554 let id = table.intern("test");
555
556 assert_eq!(table.get(id), Some("test"));
558 }
559
560 #[test]
561 fn test_string_table_get_id() {
562 let mut table = StringTable::new();
564 table.intern("existing");
565
566 assert!(table.get_id("existing").is_some());
568 assert!(table.get_id("missing").is_none());
569 }
570
571 #[test]
576 fn test_column_store_new() {
577 let store = ColumnStore::new();
579
580 assert_eq!(store.row_count(), 0);
582 }
583
584 #[test]
585 fn test_column_store_with_schema() {
586 let store = ColumnStore::with_schema(&[
588 ("category", ColumnType::String),
589 ("price", ColumnType::Int),
590 ]);
591
592 assert!(store.get_column("category").is_some());
594 assert!(store.get_column("price").is_some());
595 assert!(store.get_column("missing").is_none());
596 }
597
598 #[test]
599 fn test_column_store_push_row() {
600 let mut store = ColumnStore::with_schema(&[
602 ("category", ColumnType::String),
603 ("price", ColumnType::Int),
604 ]);
605
606 let cat_id = store.string_table_mut().intern("tech");
607
608 store.push_row(&[
610 ("category", ColumnValue::String(cat_id)),
611 ("price", ColumnValue::Int(100)),
612 ]);
613
614 assert_eq!(store.row_count(), 1);
616 }
617
618 #[test]
623 fn test_filter_eq_int() {
624 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
626 store.push_row(&[("price", ColumnValue::Int(100))]);
627 store.push_row(&[("price", ColumnValue::Int(200))]);
628 store.push_row(&[("price", ColumnValue::Int(100))]);
629
630 let matches = store.filter_eq_int("price", 100);
632
633 assert_eq!(matches, vec![0, 2]);
635 }
636
637 #[test]
638 fn test_filter_eq_string() {
639 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
641
642 let tech_id = store.string_table_mut().intern("tech");
643 let science_id = store.string_table_mut().intern("science");
644
645 store.push_row(&[("category", ColumnValue::String(tech_id))]);
646 store.push_row(&[("category", ColumnValue::String(science_id))]);
647 store.push_row(&[("category", ColumnValue::String(tech_id))]);
648
649 let matches = store.filter_eq_string("category", "tech");
651
652 assert_eq!(matches, vec![0, 2]);
654 }
655
656 #[test]
657 fn test_filter_gt_int() {
658 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
660 store.push_row(&[("price", ColumnValue::Int(50))]);
661 store.push_row(&[("price", ColumnValue::Int(100))]);
662 store.push_row(&[("price", ColumnValue::Int(150))]);
663
664 let matches = store.filter_gt_int("price", 75);
666
667 assert_eq!(matches, vec![1, 2]);
669 }
670
671 #[test]
672 fn test_filter_lt_int() {
673 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
675 store.push_row(&[("price", ColumnValue::Int(50))]);
676 store.push_row(&[("price", ColumnValue::Int(100))]);
677 store.push_row(&[("price", ColumnValue::Int(150))]);
678
679 let matches = store.filter_lt_int("price", 100);
681
682 assert_eq!(matches, vec![0]);
684 }
685
686 #[test]
687 fn test_filter_range_int() {
688 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
690 store.push_row(&[("price", ColumnValue::Int(50))]);
691 store.push_row(&[("price", ColumnValue::Int(100))]);
692 store.push_row(&[("price", ColumnValue::Int(150))]);
693 store.push_row(&[("price", ColumnValue::Int(200))]);
694
695 let matches = store.filter_range_int("price", 75, 175);
697
698 assert_eq!(matches, vec![1, 2]);
700 }
701
702 #[test]
703 fn test_filter_in_string() {
704 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
706
707 let tech_id = store.string_table_mut().intern("tech");
708 let science_id = store.string_table_mut().intern("science");
709 let art_id = store.string_table_mut().intern("art");
710
711 store.push_row(&[("category", ColumnValue::String(tech_id))]);
712 store.push_row(&[("category", ColumnValue::String(science_id))]);
713 store.push_row(&[("category", ColumnValue::String(art_id))]);
714 store.push_row(&[("category", ColumnValue::String(tech_id))]);
715
716 let matches = store.filter_in_string("category", &["tech", "art"]);
718
719 assert_eq!(matches, vec![0, 2, 3]);
721 }
722
723 #[test]
724 fn test_filter_with_null_values() {
725 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
727 store.push_row(&[("price", ColumnValue::Int(100))]);
728 store.push_row(&[("price", ColumnValue::Null)]);
729 store.push_row(&[("price", ColumnValue::Int(100))]);
730
731 let matches = store.filter_eq_int("price", 100);
733
734 assert_eq!(matches, vec![0, 2]);
736 }
737
738 #[test]
739 fn test_filter_missing_column() {
740 let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
742
743 let matches = store.filter_eq_int("missing", 100);
745
746 assert!(matches.is_empty());
748 }
749
750 #[test]
755 fn test_count_eq_int() {
756 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
758 store.push_row(&[("price", ColumnValue::Int(100))]);
759 store.push_row(&[("price", ColumnValue::Int(200))]);
760 store.push_row(&[("price", ColumnValue::Int(100))]);
761
762 let count = store.count_eq_int("price", 100);
764
765 assert_eq!(count, 2);
767 }
768
769 #[test]
770 fn test_count_eq_string() {
771 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
773
774 let tech_id = store.string_table_mut().intern("tech");
775 let science_id = store.string_table_mut().intern("science");
776
777 store.push_row(&[("category", ColumnValue::String(tech_id))]);
778 store.push_row(&[("category", ColumnValue::String(science_id))]);
779 store.push_row(&[("category", ColumnValue::String(tech_id))]);
780
781 let count = store.count_eq_string("category", "tech");
783
784 assert_eq!(count, 2);
786 }
787
788 #[test]
793 fn test_filter_eq_int_bitmap() {
794 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
796 store.push_row(&[("price", ColumnValue::Int(100))]);
797 store.push_row(&[("price", ColumnValue::Int(200))]);
798 store.push_row(&[("price", ColumnValue::Int(100))]);
799
800 let bitmap = store.filter_eq_int_bitmap("price", 100);
802
803 assert!(bitmap.contains(0));
805 assert!(!bitmap.contains(1));
806 assert!(bitmap.contains(2));
807 assert_eq!(bitmap.len(), 2);
808 }
809
810 #[test]
811 fn test_filter_eq_string_bitmap() {
812 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
814
815 let tech_id = store.string_table_mut().intern("tech");
816 let science_id = store.string_table_mut().intern("science");
817
818 store.push_row(&[("category", ColumnValue::String(tech_id))]);
819 store.push_row(&[("category", ColumnValue::String(science_id))]);
820 store.push_row(&[("category", ColumnValue::String(tech_id))]);
821
822 let bitmap = store.filter_eq_string_bitmap("category", "tech");
824
825 assert!(bitmap.contains(0));
827 assert!(!bitmap.contains(1));
828 assert!(bitmap.contains(2));
829 assert_eq!(bitmap.len(), 2);
830 }
831
832 #[test]
833 fn test_filter_range_int_bitmap() {
834 let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
836 store.push_row(&[("price", ColumnValue::Int(50))]);
837 store.push_row(&[("price", ColumnValue::Int(100))]);
838 store.push_row(&[("price", ColumnValue::Int(150))]);
839 store.push_row(&[("price", ColumnValue::Int(200))]);
840
841 let bitmap = store.filter_range_int_bitmap("price", 75, 175);
843
844 assert!(!bitmap.contains(0));
846 assert!(bitmap.contains(1));
847 assert!(bitmap.contains(2));
848 assert!(!bitmap.contains(3));
849 assert_eq!(bitmap.len(), 2);
850 }
851
852 #[test]
853 fn test_bitmap_and() {
854 let mut store = ColumnStore::with_schema(&[
856 ("price", ColumnType::Int),
857 ("category", ColumnType::String),
858 ]);
859
860 let tech_id = store.string_table_mut().intern("tech");
861 let science_id = store.string_table_mut().intern("science");
862
863 store.push_row(&[
864 ("price", ColumnValue::Int(100)),
865 ("category", ColumnValue::String(tech_id)),
866 ]);
867 store.push_row(&[
868 ("price", ColumnValue::Int(200)),
869 ("category", ColumnValue::String(tech_id)),
870 ]);
871 store.push_row(&[
872 ("price", ColumnValue::Int(100)),
873 ("category", ColumnValue::String(science_id)),
874 ]);
875
876 let price_bitmap = store.filter_eq_int_bitmap("price", 100);
878 let category_bitmap = store.filter_eq_string_bitmap("category", "tech");
879 let combined = ColumnStore::bitmap_and(&price_bitmap, &category_bitmap);
880
881 assert!(combined.contains(0));
883 assert!(!combined.contains(1));
884 assert!(!combined.contains(2));
885 assert_eq!(combined.len(), 1);
886 }
887
888 #[test]
889 fn test_bitmap_or() {
890 let mut store = ColumnStore::with_schema(&[
892 ("price", ColumnType::Int),
893 ("category", ColumnType::String),
894 ]);
895
896 let tech_id = store.string_table_mut().intern("tech");
897 let science_id = store.string_table_mut().intern("science");
898
899 store.push_row(&[
900 ("price", ColumnValue::Int(100)),
901 ("category", ColumnValue::String(tech_id)),
902 ]);
903 store.push_row(&[
904 ("price", ColumnValue::Int(200)),
905 ("category", ColumnValue::String(science_id)),
906 ]);
907 store.push_row(&[
908 ("price", ColumnValue::Int(300)),
909 ("category", ColumnValue::String(science_id)),
910 ]);
911
912 let price_bitmap = store.filter_eq_int_bitmap("price", 100);
914 let category_bitmap = store.filter_eq_string_bitmap("category", "science");
915 let combined = ColumnStore::bitmap_or(&price_bitmap, &category_bitmap);
916
917 assert!(combined.contains(0));
919 assert!(combined.contains(1));
920 assert!(combined.contains(2));
921 assert_eq!(combined.len(), 3);
922 }
923
924 #[test]
925 fn test_filter_bitmap_missing_column() {
926 let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
928
929 let bitmap = store.filter_eq_int_bitmap("missing", 100);
931
932 assert!(bitmap.is_empty());
934 }
935
936 #[test]
937 fn test_filter_bitmap_missing_string_value() {
938 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
940 let tech_id = store.string_table_mut().intern("tech");
941 store.push_row(&[("category", ColumnValue::String(tech_id))]);
942
943 let bitmap = store.filter_eq_string_bitmap("category", "nonexistent");
945
946 assert!(bitmap.is_empty());
948 }
949
950 #[test]
951 fn test_count_eq_string_missing_value() {
952 let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
954 let tech_id = store.string_table_mut().intern("tech");
955 store.push_row(&[("category", ColumnValue::String(tech_id))]);
956
957 let count = store.count_eq_string("category", "nonexistent");
959
960 assert_eq!(count, 0);
962 }
963
964 #[test]
965 fn test_add_column() {
966 let mut store = ColumnStore::new();
968
969 store.add_column("price", ColumnType::Int);
971 store.add_column("rating", ColumnType::Float);
972
973 assert!(store.get_column("price").is_some());
975 assert!(store.get_column("rating").is_some());
976 }
977}