velesdb_core/
column_store.rs

1//! Column-oriented storage for high-performance metadata filtering.
2//!
3//! This module provides a columnar storage format for frequently filtered fields,
4//! avoiding the overhead of JSON parsing during filter operations.
5//!
6//! # Performance Goals
7//!
8//! - Maintain 50M+ items/sec throughput at 100k items (vs 19M/s with JSON)
9//! - Cache-friendly sequential memory access
10//! - Support for common filter operations: Eq, Gt, Lt, In, Range
11//!
12//! # Architecture
13//!
14//! ```text
15//! ColumnStore
16//! ├── columns: HashMap<field_name, TypedColumn>
17//! │   ├── "category" -> StringColumn(Vec<Option<StringId>>)
18//! │   ├── "price"    -> IntColumn(Vec<Option<i64>>)
19//! │   └── "rating"   -> FloatColumn(Vec<Option<f64>>)
20//! └── string_table: StringTable (interning for strings)
21//! ```
22
23use roaring::RoaringBitmap;
24use rustc_hash::FxHashMap;
25use std::collections::HashMap;
26
27/// Interned string ID for fast equality comparisons.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct StringId(u32);
30
31/// String interning table for fast string comparisons.
32#[derive(Debug, Default)]
33pub struct StringTable {
34    /// String to ID mapping
35    string_to_id: FxHashMap<String, StringId>,
36    /// ID to string mapping (for retrieval)
37    id_to_string: Vec<String>,
38}
39
40impl StringTable {
41    /// Creates a new empty string table.
42    #[must_use]
43    pub fn new() -> Self {
44        Self::default()
45    }
46
47    /// Interns a string, returning its ID.
48    ///
49    /// If the string already exists, returns the existing ID.
50    pub fn intern(&mut self, s: &str) -> StringId {
51        if let Some(&id) = self.string_to_id.get(s) {
52            return id;
53        }
54
55        #[allow(clippy::cast_possible_truncation)]
56        let id = StringId(self.id_to_string.len() as u32);
57        self.id_to_string.push(s.to_string());
58        self.string_to_id.insert(s.to_string(), id);
59        id
60    }
61
62    /// Gets the string for an ID.
63    #[must_use]
64    pub fn get(&self, id: StringId) -> Option<&str> {
65        self.id_to_string.get(id.0 as usize).map(String::as_str)
66    }
67
68    /// Gets the ID for a string without interning.
69    #[must_use]
70    pub fn get_id(&self, s: &str) -> Option<StringId> {
71        self.string_to_id.get(s).copied()
72    }
73
74    /// Returns the number of interned strings.
75    #[must_use]
76    pub fn len(&self) -> usize {
77        self.id_to_string.len()
78    }
79
80    /// Returns true if the table is empty.
81    #[must_use]
82    pub fn is_empty(&self) -> bool {
83        self.id_to_string.is_empty()
84    }
85}
86
87/// A typed column storing values of a specific type.
88#[derive(Debug)]
89pub enum TypedColumn {
90    /// Integer column (i64)
91    Int(Vec<Option<i64>>),
92    /// Float column (f64)
93    Float(Vec<Option<f64>>),
94    /// String column (interned IDs)
95    String(Vec<Option<StringId>>),
96    /// Boolean column
97    Bool(Vec<Option<bool>>),
98}
99
100impl TypedColumn {
101    /// Creates a new integer column with the given capacity.
102    #[must_use]
103    pub fn new_int(capacity: usize) -> Self {
104        Self::Int(Vec::with_capacity(capacity))
105    }
106
107    /// Creates a new float column with the given capacity.
108    #[must_use]
109    pub fn new_float(capacity: usize) -> Self {
110        Self::Float(Vec::with_capacity(capacity))
111    }
112
113    /// Creates a new string column with the given capacity.
114    #[must_use]
115    pub fn new_string(capacity: usize) -> Self {
116        Self::String(Vec::with_capacity(capacity))
117    }
118
119    /// Creates a new boolean column with the given capacity.
120    #[must_use]
121    pub fn new_bool(capacity: usize) -> Self {
122        Self::Bool(Vec::with_capacity(capacity))
123    }
124
125    /// Returns the number of values in the column.
126    #[must_use]
127    pub fn len(&self) -> usize {
128        match self {
129            Self::Int(v) => v.len(),
130            Self::Float(v) => v.len(),
131            Self::String(v) => v.len(),
132            Self::Bool(v) => v.len(),
133        }
134    }
135
136    /// Returns true if the column is empty.
137    #[must_use]
138    pub fn is_empty(&self) -> bool {
139        self.len() == 0
140    }
141
142    /// Pushes a null value to the column.
143    pub fn push_null(&mut self) {
144        match self {
145            Self::Int(v) => v.push(None),
146            Self::Float(v) => v.push(None),
147            Self::String(v) => v.push(None),
148            Self::Bool(v) => v.push(None),
149        }
150    }
151}
152
153/// Column store for high-performance filtering.
154#[derive(Debug, Default)]
155pub struct ColumnStore {
156    /// Columns indexed by field name
157    columns: HashMap<String, TypedColumn>,
158    /// String interning table
159    string_table: StringTable,
160    /// Number of rows
161    row_count: usize,
162}
163
164impl ColumnStore {
165    /// Creates a new empty column store.
166    #[must_use]
167    pub fn new() -> Self {
168        Self::default()
169    }
170
171    /// Creates a column store with pre-defined indexed fields.
172    ///
173    /// # Arguments
174    ///
175    /// * `fields` - List of (`field_name`, `field_type`) tuples
176    #[must_use]
177    pub fn with_schema(fields: &[(&str, ColumnType)]) -> Self {
178        let mut store = Self::new();
179        for (name, col_type) in fields {
180            store.add_column(name, *col_type);
181        }
182        store
183    }
184
185    /// Adds a new column to the store.
186    pub fn add_column(&mut self, name: &str, col_type: ColumnType) {
187        let column = match col_type {
188            ColumnType::Int => TypedColumn::new_int(0),
189            ColumnType::Float => TypedColumn::new_float(0),
190            ColumnType::String => TypedColumn::new_string(0),
191            ColumnType::Bool => TypedColumn::new_bool(0),
192        };
193        self.columns.insert(name.to_string(), column);
194    }
195
196    /// Returns the number of rows in the store.
197    #[must_use]
198    pub fn row_count(&self) -> usize {
199        self.row_count
200    }
201
202    /// Returns the string table for string interning.
203    #[must_use]
204    pub fn string_table(&self) -> &StringTable {
205        &self.string_table
206    }
207
208    /// Returns a mutable reference to the string table.
209    pub fn string_table_mut(&mut self) -> &mut StringTable {
210        &mut self.string_table
211    }
212
213    /// Pushes values for a new row.
214    ///
215    /// Missing fields will be set to null.
216    pub fn push_row(&mut self, values: &[(&str, ColumnValue)]) {
217        // Build a map of provided values
218        let value_map: FxHashMap<&str, &ColumnValue> =
219            values.iter().map(|(k, v)| (*k, v)).collect();
220
221        // Update each column
222        for (name, column) in &mut self.columns {
223            if let Some(value) = value_map.get(name.as_str()) {
224                match value {
225                    ColumnValue::Null => column.push_null(),
226                    ColumnValue::Int(v) => {
227                        if let TypedColumn::Int(col) = column {
228                            col.push(Some(*v));
229                        } else {
230                            column.push_null();
231                        }
232                    }
233                    ColumnValue::Float(v) => {
234                        if let TypedColumn::Float(col) = column {
235                            col.push(Some(*v));
236                        } else {
237                            column.push_null();
238                        }
239                    }
240                    ColumnValue::String(id) => {
241                        if let TypedColumn::String(col) = column {
242                            col.push(Some(*id));
243                        } else {
244                            column.push_null();
245                        }
246                    }
247                    ColumnValue::Bool(v) => {
248                        if let TypedColumn::Bool(col) = column {
249                            col.push(Some(*v));
250                        } else {
251                            column.push_null();
252                        }
253                    }
254                }
255            } else {
256                column.push_null();
257            }
258        }
259
260        self.row_count += 1;
261    }
262
263    /// Gets a column by name.
264    #[must_use]
265    pub fn get_column(&self, name: &str) -> Option<&TypedColumn> {
266        self.columns.get(name)
267    }
268
269    /// Filters rows by equality on an integer column.
270    ///
271    /// Returns a vector of row indices that match.
272    #[must_use]
273    pub fn filter_eq_int(&self, column: &str, value: i64) -> Vec<usize> {
274        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
275            return Vec::new();
276        };
277
278        col.iter()
279            .enumerate()
280            .filter_map(|(idx, v)| if *v == Some(value) { Some(idx) } else { None })
281            .collect()
282    }
283
284    /// Filters rows by equality on a string column.
285    ///
286    /// Returns a vector of row indices that match.
287    #[must_use]
288    pub fn filter_eq_string(&self, column: &str, value: &str) -> Vec<usize> {
289        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
290            return Vec::new();
291        };
292
293        let Some(string_id) = self.string_table.get_id(value) else {
294            return Vec::new(); // String not in table, no matches
295        };
296
297        col.iter()
298            .enumerate()
299            .filter_map(|(idx, v)| {
300                if *v == Some(string_id) {
301                    Some(idx)
302                } else {
303                    None
304                }
305            })
306            .collect()
307    }
308
309    /// Filters rows by range on an integer column (value > threshold).
310    ///
311    /// Returns a vector of row indices that match.
312    #[must_use]
313    pub fn filter_gt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
314        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
315            return Vec::new();
316        };
317
318        col.iter()
319            .enumerate()
320            .filter_map(|(idx, v)| match v {
321                Some(val) if *val > threshold => Some(idx),
322                _ => None,
323            })
324            .collect()
325    }
326
327    /// Filters rows by range on an integer column (value < threshold).
328    #[must_use]
329    pub fn filter_lt_int(&self, column: &str, threshold: i64) -> Vec<usize> {
330        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
331            return Vec::new();
332        };
333
334        col.iter()
335            .enumerate()
336            .filter_map(|(idx, v)| match v {
337                Some(val) if *val < threshold => Some(idx),
338                _ => None,
339            })
340            .collect()
341    }
342
343    /// Filters rows by range on an integer column (low < value < high).
344    #[must_use]
345    pub fn filter_range_int(&self, column: &str, low: i64, high: i64) -> Vec<usize> {
346        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
347            return Vec::new();
348        };
349
350        col.iter()
351            .enumerate()
352            .filter_map(|(idx, v)| match v {
353                Some(val) if *val > low && *val < high => Some(idx),
354                _ => None,
355            })
356            .collect()
357    }
358
359    /// Filters rows by IN clause on a string column.
360    ///
361    /// Returns a vector of row indices that match any of the values.
362    #[must_use]
363    pub fn filter_in_string(&self, column: &str, values: &[&str]) -> Vec<usize> {
364        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
365            return Vec::new();
366        };
367
368        // Convert string values to IDs
369        let ids: Vec<StringId> = values
370            .iter()
371            .filter_map(|s| self.string_table.get_id(s))
372            .collect();
373
374        if ids.is_empty() {
375            return Vec::new();
376        }
377
378        col.iter()
379            .enumerate()
380            .filter_map(|(idx, v)| match v {
381                Some(id) if ids.contains(id) => Some(idx),
382                _ => None,
383            })
384            .collect()
385    }
386
387    /// Counts rows matching equality on an integer column.
388    ///
389    /// More efficient than `filter_eq_int().len()` as it doesn't allocate.
390    #[must_use]
391    pub fn count_eq_int(&self, column: &str, value: i64) -> usize {
392        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
393            return 0;
394        };
395
396        col.iter().filter(|v| **v == Some(value)).count()
397    }
398
399    /// Counts rows matching equality on a string column.
400    #[must_use]
401    pub fn count_eq_string(&self, column: &str, value: &str) -> usize {
402        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
403            return 0;
404        };
405
406        let Some(string_id) = self.string_table.get_id(value) else {
407            return 0;
408        };
409
410        col.iter().filter(|v| **v == Some(string_id)).count()
411    }
412
413    // =========================================================================
414    // Optimized Bitmap-based Filtering (for 100k+ items)
415    // =========================================================================
416
417    /// Filters rows by equality on an integer column, returning a bitmap.
418    ///
419    /// Uses `RoaringBitmap` for memory-efficient storage of matching indices.
420    /// Useful for combining multiple filters with AND/OR operations.
421    #[must_use]
422    #[allow(clippy::cast_possible_truncation)]
423    pub fn filter_eq_int_bitmap(&self, column: &str, value: i64) -> RoaringBitmap {
424        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
425            return RoaringBitmap::new();
426        };
427
428        col.iter()
429            .enumerate()
430            .filter_map(|(idx, v)| {
431                if *v == Some(value) {
432                    Some(idx as u32)
433                } else {
434                    None
435                }
436            })
437            .collect()
438    }
439
440    /// Filters rows by equality on a string column, returning a bitmap.
441    #[must_use]
442    #[allow(clippy::cast_possible_truncation)]
443    pub fn filter_eq_string_bitmap(&self, column: &str, value: &str) -> RoaringBitmap {
444        let Some(TypedColumn::String(col)) = self.columns.get(column) else {
445            return RoaringBitmap::new();
446        };
447
448        let Some(string_id) = self.string_table.get_id(value) else {
449            return RoaringBitmap::new();
450        };
451
452        col.iter()
453            .enumerate()
454            .filter_map(|(idx, v)| {
455                if *v == Some(string_id) {
456                    Some(idx as u32)
457                } else {
458                    None
459                }
460            })
461            .collect()
462    }
463
464    /// Filters rows by range on an integer column, returning a bitmap.
465    #[must_use]
466    #[allow(clippy::cast_possible_truncation)]
467    pub fn filter_range_int_bitmap(&self, column: &str, low: i64, high: i64) -> RoaringBitmap {
468        let Some(TypedColumn::Int(col)) = self.columns.get(column) else {
469            return RoaringBitmap::new();
470        };
471
472        col.iter()
473            .enumerate()
474            .filter_map(|(idx, v)| match v {
475                Some(val) if *val > low && *val < high => Some(idx as u32),
476                _ => None,
477            })
478            .collect()
479    }
480
481    /// Combines two filter results using AND.
482    ///
483    /// Returns indices that are in both bitmaps.
484    #[must_use]
485    pub fn bitmap_and(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
486        a & b
487    }
488
489    /// Combines two filter results using OR.
490    ///
491    /// Returns indices that are in either bitmap.
492    #[must_use]
493    pub fn bitmap_or(a: &RoaringBitmap, b: &RoaringBitmap) -> RoaringBitmap {
494        a | b
495    }
496}
497
498/// Column type for schema definition.
499#[derive(Debug, Clone, Copy, PartialEq, Eq)]
500pub enum ColumnType {
501    /// 64-bit signed integer
502    Int,
503    /// 64-bit floating point
504    Float,
505    /// Interned string
506    String,
507    /// Boolean
508    Bool,
509}
510
511/// A value that can be stored in a column.
512#[derive(Debug, Clone)]
513pub enum ColumnValue {
514    /// Integer value
515    Int(i64),
516    /// Float value
517    Float(f64),
518    /// String ID (must be interned first)
519    String(StringId),
520    /// Boolean value
521    Bool(bool),
522    /// Null value
523    Null,
524}
525
526#[cfg(test)]
527mod tests {
528    use super::*;
529
530    // =========================================================================
531    // TDD Tests for StringTable
532    // =========================================================================
533
534    #[test]
535    fn test_string_table_intern() {
536        // Arrange
537        let mut table = StringTable::new();
538
539        // Act
540        let id1 = table.intern("hello");
541        let id2 = table.intern("world");
542        let id3 = table.intern("hello"); // Same as id1
543
544        // Assert
545        assert_eq!(id1, id3);
546        assert_ne!(id1, id2);
547        assert_eq!(table.len(), 2);
548    }
549
550    #[test]
551    fn test_string_table_get() {
552        // Arrange
553        let mut table = StringTable::new();
554        let id = table.intern("test");
555
556        // Act & Assert
557        assert_eq!(table.get(id), Some("test"));
558    }
559
560    #[test]
561    fn test_string_table_get_id() {
562        // Arrange
563        let mut table = StringTable::new();
564        table.intern("existing");
565
566        // Act & Assert
567        assert!(table.get_id("existing").is_some());
568        assert!(table.get_id("missing").is_none());
569    }
570
571    // =========================================================================
572    // TDD Tests for ColumnStore - Basic Operations
573    // =========================================================================
574
575    #[test]
576    fn test_column_store_new() {
577        // Arrange & Act
578        let store = ColumnStore::new();
579
580        // Assert
581        assert_eq!(store.row_count(), 0);
582    }
583
584    #[test]
585    fn test_column_store_with_schema() {
586        // Arrange & Act
587        let store = ColumnStore::with_schema(&[
588            ("category", ColumnType::String),
589            ("price", ColumnType::Int),
590        ]);
591
592        // Assert
593        assert!(store.get_column("category").is_some());
594        assert!(store.get_column("price").is_some());
595        assert!(store.get_column("missing").is_none());
596    }
597
598    #[test]
599    fn test_column_store_push_row() {
600        // Arrange
601        let mut store = ColumnStore::with_schema(&[
602            ("category", ColumnType::String),
603            ("price", ColumnType::Int),
604        ]);
605
606        let cat_id = store.string_table_mut().intern("tech");
607
608        // Act
609        store.push_row(&[
610            ("category", ColumnValue::String(cat_id)),
611            ("price", ColumnValue::Int(100)),
612        ]);
613
614        // Assert
615        assert_eq!(store.row_count(), 1);
616    }
617
618    // =========================================================================
619    // TDD Tests for ColumnStore - Filtering
620    // =========================================================================
621
622    #[test]
623    fn test_filter_eq_int() {
624        // Arrange
625        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
626        store.push_row(&[("price", ColumnValue::Int(100))]);
627        store.push_row(&[("price", ColumnValue::Int(200))]);
628        store.push_row(&[("price", ColumnValue::Int(100))]);
629
630        // Act
631        let matches = store.filter_eq_int("price", 100);
632
633        // Assert
634        assert_eq!(matches, vec![0, 2]);
635    }
636
637    #[test]
638    fn test_filter_eq_string() {
639        // Arrange
640        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
641
642        let tech_id = store.string_table_mut().intern("tech");
643        let science_id = store.string_table_mut().intern("science");
644
645        store.push_row(&[("category", ColumnValue::String(tech_id))]);
646        store.push_row(&[("category", ColumnValue::String(science_id))]);
647        store.push_row(&[("category", ColumnValue::String(tech_id))]);
648
649        // Act
650        let matches = store.filter_eq_string("category", "tech");
651
652        // Assert
653        assert_eq!(matches, vec![0, 2]);
654    }
655
656    #[test]
657    fn test_filter_gt_int() {
658        // Arrange
659        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
660        store.push_row(&[("price", ColumnValue::Int(50))]);
661        store.push_row(&[("price", ColumnValue::Int(100))]);
662        store.push_row(&[("price", ColumnValue::Int(150))]);
663
664        // Act
665        let matches = store.filter_gt_int("price", 75);
666
667        // Assert
668        assert_eq!(matches, vec![1, 2]);
669    }
670
671    #[test]
672    fn test_filter_lt_int() {
673        // Arrange
674        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
675        store.push_row(&[("price", ColumnValue::Int(50))]);
676        store.push_row(&[("price", ColumnValue::Int(100))]);
677        store.push_row(&[("price", ColumnValue::Int(150))]);
678
679        // Act
680        let matches = store.filter_lt_int("price", 100);
681
682        // Assert
683        assert_eq!(matches, vec![0]);
684    }
685
686    #[test]
687    fn test_filter_range_int() {
688        // Arrange
689        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
690        store.push_row(&[("price", ColumnValue::Int(50))]);
691        store.push_row(&[("price", ColumnValue::Int(100))]);
692        store.push_row(&[("price", ColumnValue::Int(150))]);
693        store.push_row(&[("price", ColumnValue::Int(200))]);
694
695        // Act
696        let matches = store.filter_range_int("price", 75, 175);
697
698        // Assert
699        assert_eq!(matches, vec![1, 2]);
700    }
701
702    #[test]
703    fn test_filter_in_string() {
704        // Arrange
705        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
706
707        let tech_id = store.string_table_mut().intern("tech");
708        let science_id = store.string_table_mut().intern("science");
709        let art_id = store.string_table_mut().intern("art");
710
711        store.push_row(&[("category", ColumnValue::String(tech_id))]);
712        store.push_row(&[("category", ColumnValue::String(science_id))]);
713        store.push_row(&[("category", ColumnValue::String(art_id))]);
714        store.push_row(&[("category", ColumnValue::String(tech_id))]);
715
716        // Act
717        let matches = store.filter_in_string("category", &["tech", "art"]);
718
719        // Assert
720        assert_eq!(matches, vec![0, 2, 3]);
721    }
722
723    #[test]
724    fn test_filter_with_null_values() {
725        // Arrange
726        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
727        store.push_row(&[("price", ColumnValue::Int(100))]);
728        store.push_row(&[("price", ColumnValue::Null)]);
729        store.push_row(&[("price", ColumnValue::Int(100))]);
730
731        // Act
732        let matches = store.filter_eq_int("price", 100);
733
734        // Assert - nulls should not match
735        assert_eq!(matches, vec![0, 2]);
736    }
737
738    #[test]
739    fn test_filter_missing_column() {
740        // Arrange
741        let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
742
743        // Act
744        let matches = store.filter_eq_int("missing", 100);
745
746        // Assert
747        assert!(matches.is_empty());
748    }
749
750    // =========================================================================
751    // TDD Tests for ColumnStore - Count Operations
752    // =========================================================================
753
754    #[test]
755    fn test_count_eq_int() {
756        // Arrange
757        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
758        store.push_row(&[("price", ColumnValue::Int(100))]);
759        store.push_row(&[("price", ColumnValue::Int(200))]);
760        store.push_row(&[("price", ColumnValue::Int(100))]);
761
762        // Act
763        let count = store.count_eq_int("price", 100);
764
765        // Assert
766        assert_eq!(count, 2);
767    }
768
769    #[test]
770    fn test_count_eq_string() {
771        // Arrange
772        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
773
774        let tech_id = store.string_table_mut().intern("tech");
775        let science_id = store.string_table_mut().intern("science");
776
777        store.push_row(&[("category", ColumnValue::String(tech_id))]);
778        store.push_row(&[("category", ColumnValue::String(science_id))]);
779        store.push_row(&[("category", ColumnValue::String(tech_id))]);
780
781        // Act
782        let count = store.count_eq_string("category", "tech");
783
784        // Assert
785        assert_eq!(count, 2);
786    }
787
788    // =========================================================================
789    // TDD Tests for Bitmap Operations
790    // =========================================================================
791
792    #[test]
793    fn test_filter_eq_int_bitmap() {
794        // Arrange
795        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
796        store.push_row(&[("price", ColumnValue::Int(100))]);
797        store.push_row(&[("price", ColumnValue::Int(200))]);
798        store.push_row(&[("price", ColumnValue::Int(100))]);
799
800        // Act
801        let bitmap = store.filter_eq_int_bitmap("price", 100);
802
803        // Assert
804        assert!(bitmap.contains(0));
805        assert!(!bitmap.contains(1));
806        assert!(bitmap.contains(2));
807        assert_eq!(bitmap.len(), 2);
808    }
809
810    #[test]
811    fn test_filter_eq_string_bitmap() {
812        // Arrange
813        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
814
815        let tech_id = store.string_table_mut().intern("tech");
816        let science_id = store.string_table_mut().intern("science");
817
818        store.push_row(&[("category", ColumnValue::String(tech_id))]);
819        store.push_row(&[("category", ColumnValue::String(science_id))]);
820        store.push_row(&[("category", ColumnValue::String(tech_id))]);
821
822        // Act
823        let bitmap = store.filter_eq_string_bitmap("category", "tech");
824
825        // Assert
826        assert!(bitmap.contains(0));
827        assert!(!bitmap.contains(1));
828        assert!(bitmap.contains(2));
829        assert_eq!(bitmap.len(), 2);
830    }
831
832    #[test]
833    fn test_filter_range_int_bitmap() {
834        // Arrange
835        let mut store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
836        store.push_row(&[("price", ColumnValue::Int(50))]);
837        store.push_row(&[("price", ColumnValue::Int(100))]);
838        store.push_row(&[("price", ColumnValue::Int(150))]);
839        store.push_row(&[("price", ColumnValue::Int(200))]);
840
841        // Act
842        let bitmap = store.filter_range_int_bitmap("price", 75, 175);
843
844        // Assert
845        assert!(!bitmap.contains(0));
846        assert!(bitmap.contains(1));
847        assert!(bitmap.contains(2));
848        assert!(!bitmap.contains(3));
849        assert_eq!(bitmap.len(), 2);
850    }
851
852    #[test]
853    fn test_bitmap_and() {
854        // Arrange
855        let mut store = ColumnStore::with_schema(&[
856            ("price", ColumnType::Int),
857            ("category", ColumnType::String),
858        ]);
859
860        let tech_id = store.string_table_mut().intern("tech");
861        let science_id = store.string_table_mut().intern("science");
862
863        store.push_row(&[
864            ("price", ColumnValue::Int(100)),
865            ("category", ColumnValue::String(tech_id)),
866        ]);
867        store.push_row(&[
868            ("price", ColumnValue::Int(200)),
869            ("category", ColumnValue::String(tech_id)),
870        ]);
871        store.push_row(&[
872            ("price", ColumnValue::Int(100)),
873            ("category", ColumnValue::String(science_id)),
874        ]);
875
876        // Act
877        let price_bitmap = store.filter_eq_int_bitmap("price", 100);
878        let category_bitmap = store.filter_eq_string_bitmap("category", "tech");
879        let combined = ColumnStore::bitmap_and(&price_bitmap, &category_bitmap);
880
881        // Assert - only row 0 matches both conditions
882        assert!(combined.contains(0));
883        assert!(!combined.contains(1));
884        assert!(!combined.contains(2));
885        assert_eq!(combined.len(), 1);
886    }
887
888    #[test]
889    fn test_bitmap_or() {
890        // Arrange
891        let mut store = ColumnStore::with_schema(&[
892            ("price", ColumnType::Int),
893            ("category", ColumnType::String),
894        ]);
895
896        let tech_id = store.string_table_mut().intern("tech");
897        let science_id = store.string_table_mut().intern("science");
898
899        store.push_row(&[
900            ("price", ColumnValue::Int(100)),
901            ("category", ColumnValue::String(tech_id)),
902        ]);
903        store.push_row(&[
904            ("price", ColumnValue::Int(200)),
905            ("category", ColumnValue::String(science_id)),
906        ]);
907        store.push_row(&[
908            ("price", ColumnValue::Int(300)),
909            ("category", ColumnValue::String(science_id)),
910        ]);
911
912        // Act
913        let price_bitmap = store.filter_eq_int_bitmap("price", 100);
914        let category_bitmap = store.filter_eq_string_bitmap("category", "science");
915        let combined = ColumnStore::bitmap_or(&price_bitmap, &category_bitmap);
916
917        // Assert - rows 0, 1, 2 match (0 for price, 1 and 2 for category)
918        assert!(combined.contains(0));
919        assert!(combined.contains(1));
920        assert!(combined.contains(2));
921        assert_eq!(combined.len(), 3);
922    }
923
924    #[test]
925    fn test_filter_bitmap_missing_column() {
926        // Arrange
927        let store = ColumnStore::with_schema(&[("price", ColumnType::Int)]);
928
929        // Act
930        let bitmap = store.filter_eq_int_bitmap("missing", 100);
931
932        // Assert
933        assert!(bitmap.is_empty());
934    }
935
936    #[test]
937    fn test_filter_bitmap_missing_string_value() {
938        // Arrange
939        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
940        let tech_id = store.string_table_mut().intern("tech");
941        store.push_row(&[("category", ColumnValue::String(tech_id))]);
942
943        // Act - search for a string that was never interned
944        let bitmap = store.filter_eq_string_bitmap("category", "nonexistent");
945
946        // Assert
947        assert!(bitmap.is_empty());
948    }
949
950    #[test]
951    fn test_count_eq_string_missing_value() {
952        // Arrange
953        let mut store = ColumnStore::with_schema(&[("category", ColumnType::String)]);
954        let tech_id = store.string_table_mut().intern("tech");
955        store.push_row(&[("category", ColumnValue::String(tech_id))]);
956
957        // Act - count a string that was never interned
958        let count = store.count_eq_string("category", "nonexistent");
959
960        // Assert
961        assert_eq!(count, 0);
962    }
963
964    #[test]
965    fn test_add_column() {
966        // Arrange
967        let mut store = ColumnStore::new();
968
969        // Act
970        store.add_column("price", ColumnType::Int);
971        store.add_column("rating", ColumnType::Float);
972
973        // Assert
974        assert!(store.get_column("price").is_some());
975        assert!(store.get_column("rating").is_some());
976    }
977}