velesdb_core/
column_store.rs

1//! Column-oriented storage for high-performance metadata filtering.
2//!
3//! This module provides a columnar storage format for frequently filtered fields,
4//! avoiding the overhead of JSON parsing during filter operations.
5//!
6//! # Performance Goals
7//!
8//! - Maintain 50M+ items/sec throughput at 100k items (vs 19M/s with JSON)
9//! - Cache-friendly sequential memory access
10//! - Support for common filter operations: Eq, Gt, Lt, In, Range
11//!
12//! # Architecture
13//!
14//! ```text
15//! ColumnStore
16//! ├── columns: HashMap<field_name, TypedColumn>
17//! │   ├── "category" -> StringColumn(Vec<Option<StringId>>)
18//! │   ├── "price"    -> IntColumn(Vec<Option<i64>>)
19//! │   └── "rating"   -> FloatColumn(Vec<Option<f64>>)
20//! └── string_table: StringTable (interning for strings)
21//! ```
22
23use roaring::RoaringBitmap;
24use rustc_hash::FxHashMap;
25use std::collections::HashMap;
26
27/// Interned string ID for fast equality comparisons.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct StringId(u32);
30
31/// String interning table for fast string comparisons.
32#[derive(Debug, Default)]
33pub struct StringTable {
34    /// String to ID mapping
35    string_to_id: FxHashMap<String, StringId>,
36    /// ID to string mapping (for retrieval)
37    id_to_string: Vec<String>,
38}
39
40impl StringTable {
41    /// Creates a new empty string table.
42    #[must_use]
43    pub fn new() -> Self {
44        Self::default()
45    }
46
47    /// Interns a string, returning its ID.
48    ///
49    /// If the string already exists, returns the existing ID.
50    pub fn intern(&mut self, s: &str) -> StringId {
51        if let Some(&id) = self.string_to_id.get(s) {
52            return id;
53        }
54
55        #[allow(clippy::cast_possible_truncation)]
56        let id = StringId(self.id_to_string.len() as u32);
57        self.id_to_string.push(s.to_string());
58        self.string_to_id.insert(s.to_string(), id);
59        id
60    }
61
62    /// Gets the string for an ID.
63    #[must_use]
64    pub fn get(&self, id: StringId) -> Option<&str> {
65        self.id_to_string.get(id.0 as usize).map(String::as_str)
66    }
67
68    /// Gets the ID for a string without interning.
69    #[must_use]
70    pub fn get_id(&self, s: &str) -> Option<StringId> {
71        self.string_to_id.get(s).copied()
72    }
73
74    /// Returns the number of interned strings.
75    #[must_use]
76    pub fn len(&self) -> usize {
77        self.id_to_string.len()
78    }
79
80    /// Returns true if the table is empty.
81    #[must_use]
82    pub fn is_empty(&self) -> bool {
83        self.id_to_string.is_empty()
84    }
85}
86
87/// A typed column storing values of a specific type.
88#[derive(Debug)]
89pub enum TypedColumn {
90    /// Integer column (i64)
91    Int(Vec<Option<i64>>),
92    /// Float column (f64)
93    Float(Vec<Option<f64>>),
94    /// String column (interned IDs)
95    String(Vec<Option<StringId>>),
96    /// Boolean column
97    Bool(Vec<Option<bool>>),
98}
99
100impl TypedColumn {
101    /// Creates a new integer column with the given capacity.
102    #[must_use]
103    pub fn new_int(capacity: usize) -> Self {
104        Self::Int(Vec::with_capacity(capacity))
105    }
106
107    /// Creates a new float column with the given capacity.
108    #[must_use]
109    pub fn new_float(capacity: usize) -> Self {
110        Self::Float(Vec::with_capacity(capacity))
111    }
112
113    /// Creates a new string column with the given capacity.
114    #[must_use]
115    pub fn new_string(capacity: usize) -> Self {
116        Self::String(Vec::with_capacity(capacity))
117    }
118
119    /// Creates a new boolean column with the given capacity.
120    #[must_use]
121    pub fn new_bool(capacity: usize) -> Self {
122        Self::Bool(Vec::with_capacity(capacity))
123    }
124
125    /// Returns the number of values in the column.
126    #[must_use]
127    pub fn len(&self) -> usize {
128        match self {
129            Self::Int(v) => v.len(),
130            Self::Float(v) => v.len(),
131            Self::String(v) => v.len(),
132            Self::Bool(v) => v.len(),
133        }
134    }
135
136    /// Returns true if the column is empty.
137    #[must_use]
138    pub fn is_empty(&self) -> bool {
139        self.len() == 0
140    }
141
142    /// Pushes a null value to the column.
143    pub fn push_null(&mut self) {
144        match self {
145            Self::Int(v) => v.push(None),
146            Self::Float(v) => v.push(None),
147            Self::String(v) => v.push(None),
148            Self::Bool(v) => v.push(None),
149        }
150    }
151}
152
153/// Column store for high-performance filtering.
154#[derive(Debug, Default)]
155pub struct ColumnStore {
156    /// Columns indexed by field name
157    columns: HashMap<String, TypedColumn>,
158    /// String interning table
159    string_table: StringTable,
160    /// Number of rows
161    row_count: usize,
162}
163
164impl ColumnStore {
165    /// Creates a new empty column store.
166    #[must_use]
167    pub fn new() -> Self {
168        Self::default()
169    }
170
171    /// Creates a column store with pre-defined indexed fields.
172    ///
173    /// # Arguments
174    ///
175    /// * `fields` - List of (`field_name`, `field_type`) tuples
176    #[must_use]
177    pub fn with_schema(fields: &[(&str, ColumnType)]) -> Self {
178        let mut store = Self::new();
179        for (name, col_type) in fields {
180            store.add_column(name, *col_type);
181        }
182        store
183    }
184
185    /// Adds a new column to the store.
186    pub fn add_column(&mut self, name: &str, col_type: ColumnType) {
187        let column = match col_type {
188            ColumnType::Int => TypedColumn::new_int(0),
189            ColumnType::Float => TypedColumn::new_float(0),
190            ColumnType::String => TypedColumn::new_string(0),
191            ColumnType::Bool => TypedColumn::new_bool(0),
192        };
193        self.columns.insert(name.to_string(), column);
194    }
195
196    /// Returns the number of rows in the store.
197    #[must_use]
198    pub fn row_count(&self) -> usize {
199        self.row_count
200    }
201
202    /// Returns the string table for string interning.
203    #[must_use]
204    pub fn string_table(&self) -> &StringTable {
205        &self.string_table
206    }
207
208    /// Returns a mutable reference to the string table.
209    pub fn string_table_mut(&mut self) -> &mut StringTable {
210        &mut self.string_table
211    }
212
213    /// Pushes values for a new row.
214    ///
215    /// Missing fields will be set to null.
216    pub fn push_row(&mut self, values: &[(&str, ColumnValue)]) {
217        // Build a map of provided values
218        let value_map: FxHashMap<&str, &ColumnValue> =
219            values.iter().map(|(k, v)| (*k, v)).collect();
220
221        // Update each column
222        for (name, column) in &mut self.columns {
223            if let Some(value) = value_map.get(name.as_str()) {
224                match value {
225                    ColumnValue::Null => column.push_null(),
226                    ColumnValue::Int(v) => {
227                        if let TypedColumn::Int(col) = column {
228                            col.push(Some(*v));
229                        } else {
230                            column.push_null();
231                        }
232                    }
233                    ColumnValue::Float(v) => {
234                        if let TypedColumn::Float(col) = column {
235                            col.push(Some(*v));
236                        } else {
237                            column.push_null();
238                        }
239                    }
240                    ColumnValue::String(id) => {
241                        if let TypedColumn::String(col) = column {
242                            col.push(Some(*id));
243                        } else {
244                            column.push_null();
245                        }
246                    }
247                    ColumnValue::Bool(v) => {
248                        if let TypedColumn::Bool(col) = column {
249                            col.push(Some(*v));
250                        } else {
251                            column.push_null();
252                        }
253                    }
254                }
255            } else {
256                column.push_null();
257            }
258        }
259
260        self.row_count += 1;
261    }
262
263    /// Gets a column by name.
264    #[must_use]
265    pub fn get_column(&self, name: &str) -> Option<&TypedColumn> {
266        self.columns.get(name)
267    }
268
269    /// Filters rows by equality on an integer column.
270    ///
271    /// Returns a vector of row indices that match.
272    #[must_use]
273    pub fn filter_eq_int(&self, column: &str, value: i64) -> Vec<usize> {
274        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
275            return Vec::new();
276        };
277
278        col.iter()
279            .enumerate()
280            .filter_map(|(idx, v)| if *v == Some(value) { Some(idx) } else { None })
281            .collect()
282    }
283
284    /// Filters rows by equality on a string column.
285    ///
286    /// Returns a vector of row indices that match.
287    #[must_use]
288    pub fn filter_eq_string(&self, column: &str, value: &str) -> Vec<usize> {
289        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
290            return Vec::new();
291        };
292
293        let Some(string_id) = self.string_table.get_id(value) else {
294            return Vec::new(); // String not in table, no matches
295        };
296
297        col.iter()
298            .enumerate()
299            .filter_map(|(idx, v)| {
300                if *v == Some(string_id) {
301                    Some(idx)
302                } else {
303                    None
304                }
305            })
306            .collect()
307    }
308
309    /// Filters rows by range on an integer column (value > threshold).
310    ///
311    /// Returns a vector of row indices that match.
312    #[must_use]
313    pub fn filter_gt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
314        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
315            return Vec::new();
316        };
317
318        col.iter()
319            .enumerate()
320            .filter_map(|(idx, v)| match v {
321                Some(val) if *val > threshold => Some(idx),
322                _ => None,
323            })
324            .collect()
325    }
326
327    /// Filters rows by range on an integer column (value < threshold).
328    #[must_use]
329    pub fn filter_lt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
330        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
331            return Vec::new();
332        };
333
334        col.iter()
335            .enumerate()
336            .filter_map(|(idx, v)| match v {
337                Some(val) if *val < threshold => Some(idx),
338                _ => None,
339            })
340            .collect()
341    }
342
343    /// Filters rows by range on an integer column (low < value < high).
344    #[must_use]
345    pub fn filter_range_int(&self, column: &str, low: i64, high: i64) -> Vec<usize> {
346        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
347            return Vec::new();
348        };
349
350        col.iter()
351            .enumerate()
352            .filter_map(|(idx, v)| match v {
353                Some(val) if *val > low && *val < high => Some(idx),
354                _ => None,
355            })
356            .collect()
357    }
358
359    /// Filters rows by IN clause on a string column.
360    ///
361    /// Returns a vector of row indices that match any of the values.
362    #[must_use]
363    pub fn filter_in_string(&self, column: &str, values: &[&str]) -> Vec<usize> {
364        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
365            return Vec::new();
366        };
367
368        // Convert string values to IDs
369        let ids: Vec<StringId> = values
370            .iter()
371            .filter_map(|s| self.string_table.get_id(s))
372            .collect();
373
374        if ids.is_empty() {
375            return Vec::new();
376        }
377
378        // Perf: Use HashSet only for large IN clauses (>16 values)
379        // Vec.contains() is faster for small arrays due to cache locality
380        if ids.len() > 16 {
381            let id_set: rustc_hash::FxHashSet<StringId> = ids.into_iter().collect();
382            col.iter()
383                .enumerate()
384                .filter_map(|(idx, v)| match v {
385                    Some(id) if id_set.contains(id) => Some(idx),
386                    _ => None,
387                })
388                .collect()
389        } else {
390            col.iter()
391                .enumerate()
392                .filter_map(|(idx, v)| match v {
393                    Some(id) if ids.contains(id) => Some(idx),
394                    _ => None,
395                })
396                .collect()
397        }
398    }
399
400    /// Counts rows matching equality on an integer column.
401    ///
402    /// More efficient than `filter_eq_int().len()` as it doesn't allocate.
403    #[must_use]
404    pub fn count_eq_int(&self, column: &str, value: i64) -> usize {
405        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
406            return 0;
407        };
408
409        col.iter().filter(|v| **v == Some(value)).count()
410    }
411
412    /// Counts rows matching equality on a string column.
413    #[must_use]
414    pub fn count_eq_string(&self, column: &str, value: &str) -> usize {
415        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
416            return 0;
417        };
418
419        let Some(string_id) = self.string_table.get_id(value) else {
420            return 0;
421        };
422
423        col.iter().filter(|v| **v == Some(string_id)).count()
424    }
425
426    // =========================================================================
427    // Optimized Bitmap-based Filtering (for 100k+ items)
428    // =========================================================================
429
430    /// Filters rows by equality on an integer column, returning a bitmap.
431    ///
432    /// Uses `RoaringBitmap` for memory-efficient storage of matching indices.
433    /// Useful for combining multiple filters with AND/OR operations.
434    #[must_use]
435    #[allow(clippy::cast_possible_truncation)]
436    pub fn filter_eq_int_bitmap(&self, column: &str, value: i64) -> RoaringBitmap {
437        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
438            return RoaringBitmap::new();
439        };
440
441        col.iter()
442            .enumerate()
443            .filter_map(|(idx, v)| {
444                if *v == Some(value) {
445                    Some(idx as u32)
446                } else {
447                    None
448                }
449            })
450            .collect()
451    }
452
453    /// Filters rows by equality on a string column, returning a bitmap.
454    #[must_use]
455    #[allow(clippy::cast_possible_truncation)]
456    pub fn filter_eq_string_bitmap(&self, column: &str, value: &str) -> RoaringBitmap {
457        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
458            return RoaringBitmap::new();
459        };
460
461        let Some(string_id) = self.string_table.get_id(value) else {
462            return RoaringBitmap::new();
463        };
464
465        col.iter()
466            .enumerate()
467            .filter_map(|(idx, v)| {
468                if *v == Some(string_id) {
469                    Some(idx as u32)
470                } else {
471                    None
472                }
473            })
474            .collect()
475    }
476
477    /// Filters rows by range on an integer column, returning a bitmap.
478    #[must_use]
479    #[allow(clippy::cast_possible_truncation)]
480    pub fn filter_range_int_bitmap(&self, column: &str, low: i64, high: i64) -> RoaringBitmap {
481        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
482            return RoaringBitmap::new();
483        };
484
485        col.iter()
486            .enumerate()
487            .filter_map(|(idx, v)| match v {
488                Some(val) if *val > low && *val < high => Some(idx as u32),
489                _ => None,
490            })
491            .collect()
492    }
493
494    /// Combines two filter results using AND.
495    ///
496    /// Returns indices that are in both bitmaps.
497    #[must_use]
498    pub fn bitmap_and(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
499        a & b
500    }
501
502    /// Combines two filter results using OR.
503    ///
504    /// Returns indices that are in either bitmap.
505    #[must_use]
506    pub fn bitmap_or(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
507        a | b
508    }
509}
510
511/// Column type for schema definition.
512#[derive(Debug, Clone, Copy, PartialEq, Eq)]
513pub enum ColumnType {
514    /// 64-bit signed integer
515    Int,
516    /// 64-bit floating point
517    Float,
518    /// Interned string
519    String,
520    /// Boolean
521    Bool,
522}
523
524/// A value that can be stored in a column.
525#[derive(Debug, Clone)]
526pub enum ColumnValue {
527    /// Integer value
528    Int(i64),
529    /// Float value
530    Float(f64),
531    /// String ID (must be interned first)
532    String(StringId),
533    /// Boolean value
534    Bool(bool),
535    /// Null value
536    Null,
537}
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542
543    // =========================================================================
544    // TDD Tests for StringTable
545    // =========================================================================
546
547    #[test]
548    fn test_string_table_intern() {
549        // Arrange
550        let mut table = StringTable::new();
551
552        // Act
553        let id1 = table.intern("hello");
554        let id2 = table.intern("world");
555        let id3 = table.intern("hello"); // Same as id1
556
557        // Assert
558        assert_eq!(id1, id3);
559        assert_ne!(id1, id2);
560        assert_eq!(table.len(), 2);
561    }
562
563    #[test]
564    fn test_string_table_get() {
565        // Arrange
566        let mut table = StringTable::new();
567        let id = table.intern("test");
568
569        // Act & Assert
570        assert_eq!(table.get(id), Some("test"));
571    }
572
573    #[test]
574    fn test_string_table_get_id() {
575        // Arrange
576        let mut table = StringTable::new();
577        table.intern("existing");
578
579        // Act & Assert
580        assert!(table.get_id("existing").is_some());
581        assert!(table.get_id("missing").is_none());
582    }
583
584    // =========================================================================
585    // TDD Tests for ColumnStore - Basic Operations
586    // =========================================================================
587
588    #[test]
589    fn test_column_store_new() {
590        // Arrange & Act
591        let store = ColumnStore::new();
592
593        // Assert
594        assert_eq!(store.row_count(), 0);
595    }
596
597    #[test]
598    fn test_column_store_with_schema() {
599        // Arrange & Act
600        let store = ColumnStore::with_schema(&[
601            ("category", ColumnType::String),
602            ("price", ColumnType::Int),
603        ]);
604
605        // Assert
606        assert!(store.get_column("category").is_some());
607        assert!(store.get_column("price").is_some());
608        assert!(store.get_column("missing").is_none());
609    }
610
611    #[test]
612    fn test_column_store_push_row() {
613        // Arrange
614        let mut store = ColumnStore::with_schema(&[
615            ("category", ColumnType::String),
616            ("price", ColumnType::Int),
617        ]);
618
619        let cat_id = store.string_table_mut().intern("tech");
620
621        // Act
622        store.push_row(&[
623            ("category", ColumnValue::String(cat_id)),
624            ("price", ColumnValue::Int(100)),
625        ]);
626
627        // Assert
628        assert_eq!(store.row_count(), 1);
629    }
630
631    // =========================================================================
632    // TDD Tests for ColumnStore - Filtering
633    // =========================================================================
634
635    #[test]
636    fn test_filter_eq_int() {
637        // Arrange
638        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
639        store.push_row(&[("price", ColumnValue::Int(100))]);
640        store.push_row(&[("price", ColumnValue::Int(200))]);
641        store.push_row(&[("price", ColumnValue::Int(100))]);
642
643        // Act
644        let matches = store.filter_eq_int("price", 100);
645
646        // Assert
647        assert_eq!(matches, vec![0, 2]);
648    }
649
650    #[test]
651    fn test_filter_eq_string() {
652        // Arrange
653        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
654
655        let tech_id = store.string_table_mut().intern("tech");
656        let science_id = store.string_table_mut().intern("science");
657
658        store.push_row(&[("category", ColumnValue::String(tech_id))]);
659        store.push_row(&[("category", ColumnValue::String(science_id))]);
660        store.push_row(&[("category", ColumnValue::String(tech_id))]);
661
662        // Act
663        let matches = store.filter_eq_string("category", "tech");
664
665        // Assert
666        assert_eq!(matches, vec![0, 2]);
667    }
668
669    #[test]
670    fn test_filter_gt_int() {
671        // Arrange
672        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
673        store.push_row(&[("price", ColumnValue::Int(50))]);
674        store.push_row(&[("price", ColumnValue::Int(100))]);
675        store.push_row(&[("price", ColumnValue::Int(150))]);
676
677        // Act
678        let matches = store.filter_gt_int("price", 75);
679
680        // Assert
681        assert_eq!(matches, vec![1, 2]);
682    }
683
684    #[test]
685    fn test_filter_lt_int() {
686        // Arrange
687        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
688        store.push_row(&[("price", ColumnValue::Int(50))]);
689        store.push_row(&[("price", ColumnValue::Int(100))]);
690        store.push_row(&[("price", ColumnValue::Int(150))]);
691
692        // Act
693        let matches = store.filter_lt_int("price", 100);
694
695        // Assert
696        assert_eq!(matches, vec![0]);
697    }
698
699    #[test]
700    fn test_filter_range_int() {
701        // Arrange
702        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
703        store.push_row(&[("price", ColumnValue::Int(50))]);
704        store.push_row(&[("price", ColumnValue::Int(100))]);
705        store.push_row(&[("price", ColumnValue::Int(150))]);
706        store.push_row(&[("price", ColumnValue::Int(200))]);
707
708        // Act
709        let matches = store.filter_range_int("price", 75, 175);
710
711        // Assert
712        assert_eq!(matches, vec![1, 2]);
713    }
714
715    #[test]
716    fn test_filter_in_string() {
717        // Arrange
718        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
719
720        let tech_id = store.string_table_mut().intern("tech");
721        let science_id = store.string_table_mut().intern("science");
722        let art_id = store.string_table_mut().intern("art");
723
724        store.push_row(&[("category", ColumnValue::String(tech_id))]);
725        store.push_row(&[("category", ColumnValue::String(science_id))]);
726        store.push_row(&[("category", ColumnValue::String(art_id))]);
727        store.push_row(&[("category", ColumnValue::String(tech_id))]);
728
729        // Act
730        let matches = store.filter_in_string("category", &["tech", "art"]);
731
732        // Assert
733        assert_eq!(matches, vec![0, 2, 3]);
734    }
735
736    #[test]
737    fn test_filter_with_null_values() {
738        // Arrange
739        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
740        store.push_row(&[("price", ColumnValue::Int(100))]);
741        store.push_row(&[("price", ColumnValue::Null)]);
742        store.push_row(&[("price", ColumnValue::Int(100))]);
743
744        // Act
745        let matches = store.filter_eq_int("price", 100);
746
747        // Assert - nulls should not match
748        assert_eq!(matches, vec![0, 2]);
749    }
750
751    #[test]
752    fn test_filter_missing_column() {
753        // Arrange
754        let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
755
756        // Act
757        let matches = store.filter_eq_int("missing", 100);
758
759        // Assert
760        assert!(matches.is_empty());
761    }
762
763    // =========================================================================
764    // TDD Tests for ColumnStore - Count Operations
765    // =========================================================================
766
767    #[test]
768    fn test_count_eq_int() {
769        // Arrange
770        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
771        store.push_row(&[("price", ColumnValue::Int(100))]);
772        store.push_row(&[("price", ColumnValue::Int(200))]);
773        store.push_row(&[("price", ColumnValue::Int(100))]);
774
775        // Act
776        let count = store.count_eq_int("price", 100);
777
778        // Assert
779        assert_eq!(count, 2);
780    }
781
782    #[test]
783    fn test_count_eq_string() {
784        // Arrange
785        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
786
787        let tech_id = store.string_table_mut().intern("tech");
788        let science_id = store.string_table_mut().intern("science");
789
790        store.push_row(&[("category", ColumnValue::String(tech_id))]);
791        store.push_row(&[("category", ColumnValue::String(science_id))]);
792        store.push_row(&[("category", ColumnValue::String(tech_id))]);
793
794        // Act
795        let count = store.count_eq_string("category", "tech");
796
797        // Assert
798        assert_eq!(count, 2);
799    }
800
801    // =========================================================================
802    // TDD Tests for Bitmap Operations
803    // =========================================================================
804
805    #[test]
806    fn test_filter_eq_int_bitmap() {
807        // Arrange
808        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
809        store.push_row(&[("price", ColumnValue::Int(100))]);
810        store.push_row(&[("price", ColumnValue::Int(200))]);
811        store.push_row(&[("price", ColumnValue::Int(100))]);
812
813        // Act
814        let bitmap = store.filter_eq_int_bitmap("price", 100);
815
816        // Assert
817        assert!(bitmap.contains(0));
818        assert!(!bitmap.contains(1));
819        assert!(bitmap.contains(2));
820        assert_eq!(bitmap.len(), 2);
821    }
822
823    #[test]
824    fn test_filter_eq_string_bitmap() {
825        // Arrange
826        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
827
828        let tech_id = store.string_table_mut().intern("tech");
829        let science_id = store.string_table_mut().intern("science");
830
831        store.push_row(&[("category", ColumnValue::String(tech_id))]);
832        store.push_row(&[("category", ColumnValue::String(science_id))]);
833        store.push_row(&[("category", ColumnValue::String(tech_id))]);
834
835        // Act
836        let bitmap = store.filter_eq_string_bitmap("category", "tech");
837
838        // Assert
839        assert!(bitmap.contains(0));
840        assert!(!bitmap.contains(1));
841        assert!(bitmap.contains(2));
842        assert_eq!(bitmap.len(), 2);
843    }
844
845    #[test]
846    fn test_filter_range_int_bitmap() {
847        // Arrange
848        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
849        store.push_row(&[("price", ColumnValue::Int(50))]);
850        store.push_row(&[("price", ColumnValue::Int(100))]);
851        store.push_row(&[("price", ColumnValue::Int(150))]);
852        store.push_row(&[("price", ColumnValue::Int(200))]);
853
854        // Act
855        let bitmap = store.filter_range_int_bitmap("price", 75, 175);
856
857        // Assert
858        assert!(!bitmap.contains(0));
859        assert!(bitmap.contains(1));
860        assert!(bitmap.contains(2));
861        assert!(!bitmap.contains(3));
862        assert_eq!(bitmap.len(), 2);
863    }
864
865    #[test]
866    fn test_bitmap_and() {
867        // Arrange
868        let mut store = ColumnStore::with_schema(&[
869            ("price", ColumnType::Int),
870            ("category", ColumnType::String),
871        ]);
872
873        let tech_id = store.string_table_mut().intern("tech");
874        let science_id = store.string_table_mut().intern("science");
875
876        store.push_row(&[
877            ("price", ColumnValue::Int(100)),
878            ("category", ColumnValue::String(tech_id)),
879        ]);
880        store.push_row(&[
881            ("price", ColumnValue::Int(200)),
882            ("category", ColumnValue::String(tech_id)),
883        ]);
884        store.push_row(&[
885            ("price", ColumnValue::Int(100)),
886            ("category", ColumnValue::String(science_id)),
887        ]);
888
889        // Act
890        let price_bitmap = store.filter_eq_int_bitmap("price", 100);
891        let category_bitmap = store.filter_eq_string_bitmap("category", "tech");
892        let combined = ColumnStore::bitmap_and(&price_bitmap, &category_bitmap);
893
894        // Assert - only row 0 matches both conditions
895        assert!(combined.contains(0));
896        assert!(!combined.contains(1));
897        assert!(!combined.contains(2));
898        assert_eq!(combined.len(), 1);
899    }
900
901    #[test]
902    fn test_bitmap_or() {
903        // Arrange
904        let mut store = ColumnStore::with_schema(&[
905            ("price", ColumnType::Int),
906            ("category", ColumnType::String),
907        ]);
908
909        let tech_id = store.string_table_mut().intern("tech");
910        let science_id = store.string_table_mut().intern("science");
911
912        store.push_row(&[
913            ("price", ColumnValue::Int(100)),
914            ("category", ColumnValue::String(tech_id)),
915        ]);
916        store.push_row(&[
917            ("price", ColumnValue::Int(200)),
918            ("category", ColumnValue::String(science_id)),
919        ]);
920        store.push_row(&[
921            ("price", ColumnValue::Int(300)),
922            ("category", ColumnValue::String(science_id)),
923        ]);
924
925        // Act
926        let price_bitmap = store.filter_eq_int_bitmap("price", 100);
927        let category_bitmap = store.filter_eq_string_bitmap("category", "science");
928        let combined = ColumnStore::bitmap_or(&price_bitmap, &category_bitmap);
929
930        // Assert - rows 0, 1, 2 match (0 for price, 1 and 2 for category)
931        assert!(combined.contains(0));
932        assert!(combined.contains(1));
933        assert!(combined.contains(2));
934        assert_eq!(combined.len(), 3);
935    }
936
937    #[test]
938    fn test_filter_bitmap_missing_column() {
939        // Arrange
940        let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
941
942        // Act
943        let bitmap = store.filter_eq_int_bitmap("missing", 100);
944
945        // Assert
946        assert!(bitmap.is_empty());
947    }
948
949    #[test]
950    fn test_filter_bitmap_missing_string_value() {
951        // Arrange
952        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
953        let tech_id = store.string_table_mut().intern("tech");
954        store.push_row(&[("category", ColumnValue::String(tech_id))]);
955
956        // Act - search for a string that was never interned
957        let bitmap = store.filter_eq_string_bitmap("category", "nonexistent");
958
959        // Assert
960        assert!(bitmap.is_empty());
961    }
962
963    #[test]
964    fn test_count_eq_string_missing_value() {
965        // Arrange
966        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
967        let tech_id = store.string_table_mut().intern("tech");
968        store.push_row(&[("category", ColumnValue::String(tech_id))]);
969
970        // Act - count a string that was never interned
971        let count = store.count_eq_string("category", "nonexistent");
972
973        // Assert
974        assert_eq!(count, 0);
975    }
976
977    #[test]
978    fn test_add_column() {
979        // Arrange
980        let mut store = ColumnStore::new();
981
982        // Act
983        store.add_column("price", ColumnType::Int);
984        store.add_column("rating", ColumnType::Float);
985
986        // Assert
987        assert!(store.get_column("price").is_some());
988        assert!(store.get_column("rating").is_some());
989    }
990}