sochdb_query/
simd_filter.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! SIMD Vectorized Query Filters
16//!
17//! From mm.md Task 5.3: AVX-512/NEON Vectorized Filters (VQE)
18//!
19//! ## Problem
20//!
21//! Current filtering is scalar. LLM context queries often filter millions of rows
22//! (e.g., "events from last 7 days"). SIMD can evaluate 8-16 predicates per instruction.
23//!
24//! ## Solution
25//!
26//! Column-oriented data layout + compiled filter expressions + runtime SIMD feature detection
27//!
28//! ## Throughput Analysis
29//!
30//! ```text
31//! Scalar: 1 comparison/cycle × 3GHz = 3B comparisons/sec
32//! AVX-512: 8 comparisons/instruction × ~1 CPI × 3GHz = 24B/sec
33//! AVX-256: 4 comparisons/instruction = 12B/sec
34//! NEON: 4 comparisons/instruction = 12B/sec
35//!
36//! 100M rows @ 24B/sec = 4.2ms (AVX-512)
37//! 100M rows @ 3B/sec = 33ms (scalar)
38//!
39//! Speedup: 8× (AVX-512), 4× (AVX-256/NEON)
40//! ```
41
42
43/// Result bitmap - bit per row indicating pass/fail
44pub type FilterBitmap = Vec<u64>;
45
46/// Filter comparison operation
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum FilterOp {
49    Equal,
50    NotEqual,
51    LessThan,
52    LessEqual,
53    GreaterThan,
54    GreaterEqual,
55    IsNull,
56    IsNotNull,
57}
58
59/// Allocate a bitmap for the given number of rows
60#[inline]
61pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
62    vec![0u64; (num_rows + 63) / 64]
63}
64
65/// Set a bit in the bitmap
66#[inline]
67pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
68    let word_idx = idx / 64;
69    let bit_idx = idx % 64;
70    if word_idx < bitmap.len() {
71        bitmap[word_idx] |= 1u64 << bit_idx;
72    }
73}
74
75/// Check if a bit is set
76#[inline]
77pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
78    let word_idx = idx / 64;
79    let bit_idx = idx % 64;
80    if word_idx < bitmap.len() {
81        (bitmap[word_idx] >> bit_idx) & 1 == 1
82    } else {
83        false
84    }
85}
86
87/// Count set bits in bitmap
88pub fn popcount(bitmap: &FilterBitmap) -> usize {
89    bitmap.iter().map(|w| w.count_ones() as usize).sum()
90}
91
92/// AND two bitmaps together
93pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
94    a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
95}
96
97/// OR two bitmaps together
98pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
99    a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
100}
101
102/// NOT a bitmap
103pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
104    a.iter().map(|x| !x).collect()
105}
106
107// =============================================================================
108// Scalar Implementations (Fallback)
109// =============================================================================
110
111/// Scalar filter: i64 > threshold
112pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
113    for (idx, &value) in data.iter().enumerate() {
114        if value > threshold {
115            set_bit(result, idx);
116        }
117    }
118}
119
120/// Scalar filter: i64 >= threshold
121pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
122    for (idx, &value) in data.iter().enumerate() {
123        if value >= threshold {
124            set_bit(result, idx);
125        }
126    }
127}
128
129/// Scalar filter: i64 < threshold
130pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
131    for (idx, &value) in data.iter().enumerate() {
132        if value < threshold {
133            set_bit(result, idx);
134        }
135    }
136}
137
138/// Scalar filter: i64 == value
139pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
140    for (idx, &value) in data.iter().enumerate() {
141        if value == target {
142            set_bit(result, idx);
143        }
144    }
145}
146
147/// Scalar filter: i64 between low and high (inclusive)
148pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
149    for (idx, &value) in data.iter().enumerate() {
150        if value >= low && value <= high {
151            set_bit(result, idx);
152        }
153    }
154}
155
156/// Scalar filter: f64 > threshold
157pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
158    for (idx, &value) in data.iter().enumerate() {
159        if value > threshold {
160            set_bit(result, idx);
161        }
162    }
163}
164
165/// Scalar filter: f64 < threshold
166pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
167    for (idx, &value) in data.iter().enumerate() {
168        if value < threshold {
169            set_bit(result, idx);
170        }
171    }
172}
173
174// =============================================================================
175// AVX2 SIMD Implementations (x86_64)
176// =============================================================================
177
178#[cfg(target_arch = "x86_64")]
179mod avx2 {
180    use super::*;
181    use std::arch::x86_64::*;
182
183    /// Check if AVX2 is available
184    #[inline]
185    pub fn is_available() -> bool {
186        is_x86_feature_detected!("avx2")
187    }
188
189    /// AVX2 filter: i64 > threshold
190    ///
191    /// Processes 4 i64 values per iteration using 256-bit vectors.
192    #[target_feature(enable = "avx2")]
193    pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
194        let threshold_vec = _mm256_set1_epi64x(threshold);
195        let len = data.len();
196        let chunks = len / 4;
197
198        for chunk_idx in 0..chunks {
199            let offset = chunk_idx * 4;
200            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
201
202            // Compare greater than
203            let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
204
205            // Extract mask
206            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
207
208            // Set bits in result
209            let word_idx = offset / 64;
210            let bit_offset = offset % 64;
211            if word_idx < result.len() {
212                result[word_idx] |= mask << bit_offset;
213                // Handle overflow to next word
214                if bit_offset > 60 && word_idx + 1 < result.len() {
215                    result[word_idx + 1] |= mask >> (64 - bit_offset);
216                }
217            }
218        }
219
220        // Handle remainder with scalar
221        let remainder_start = chunks * 4;
222        for idx in remainder_start..len {
223            if data[idx] > threshold {
224                set_bit(result, idx);
225            }
226        }
227    }
228
229    /// AVX2 filter: i64 < threshold
230    #[target_feature(enable = "avx2")]
231    pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
232        let threshold_vec = _mm256_set1_epi64x(threshold);
233        let len = data.len();
234        let chunks = len / 4;
235
236        for chunk_idx in 0..chunks {
237            let offset = chunk_idx * 4;
238            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
239
240            // Compare: data < threshold is equivalent to threshold > data
241            let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
242            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
243
244            let word_idx = offset / 64;
245            let bit_offset = offset % 64;
246            if word_idx < result.len() {
247                result[word_idx] |= mask << bit_offset;
248                if bit_offset > 60 && word_idx + 1 < result.len() {
249                    result[word_idx + 1] |= mask >> (64 - bit_offset);
250                }
251            }
252        }
253
254        let remainder_start = chunks * 4;
255        for idx in remainder_start..len {
256            if data[idx] < threshold {
257                set_bit(result, idx);
258            }
259        }
260    }
261
262    /// AVX2 filter: i64 == value
263    #[target_feature(enable = "avx2")]
264    pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
265        let target_vec = _mm256_set1_epi64x(target);
266        let len = data.len();
267        let chunks = len / 4;
268
269        for chunk_idx in 0..chunks {
270            let offset = chunk_idx * 4;
271            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
272
273            let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
274            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
275
276            let word_idx = offset / 64;
277            let bit_offset = offset % 64;
278            if word_idx < result.len() {
279                result[word_idx] |= mask << bit_offset;
280                if bit_offset > 60 && word_idx + 1 < result.len() {
281                    result[word_idx + 1] |= mask >> (64 - bit_offset);
282                }
283            }
284        }
285
286        let remainder_start = chunks * 4;
287        for idx in remainder_start..len {
288            if data[idx] == target {
289                set_bit(result, idx);
290            }
291        }
292    }
293
294    /// AVX2 filter: i64 between low and high
295    #[target_feature(enable = "avx2")]
296    pub unsafe fn filter_i64_between_avx2(
297        data: &[i64],
298        low: i64,
299        high: i64,
300        result: &mut FilterBitmap,
301    ) {
302        let low_vec = _mm256_set1_epi64x(low - 1); // For >= comparison
303        let high_vec = _mm256_set1_epi64x(high);
304        let len = data.len();
305        let chunks = len / 4;
306
307        for chunk_idx in 0..chunks {
308            let offset = chunk_idx * 4;
309            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
310
311            // data > (low - 1) AND data <= high
312            let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
313            let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
314            let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
315            let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
316            let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
317
318            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
319
320            let word_idx = offset / 64;
321            let bit_offset = offset % 64;
322            if word_idx < result.len() {
323                result[word_idx] |= mask << bit_offset;
324                if bit_offset > 60 && word_idx + 1 < result.len() {
325                    result[word_idx + 1] |= mask >> (64 - bit_offset);
326                }
327            }
328        }
329
330        let remainder_start = chunks * 4;
331        for idx in remainder_start..len {
332            let v = data[idx];
333            if v >= low && v <= high {
334                set_bit(result, idx);
335            }
336        }
337    }
338
339    /// AVX2 filter: f64 > threshold
340    #[target_feature(enable = "avx2")]
341    pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
342        let threshold_vec = _mm256_set1_pd(threshold);
343        let len = data.len();
344        let chunks = len / 4;
345
346        for chunk_idx in 0..chunks {
347            let offset = chunk_idx * 4;
348            let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
349
350            let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
351            let mask = _mm256_movemask_pd(cmp) as u64;
352
353            let word_idx = offset / 64;
354            let bit_offset = offset % 64;
355            if word_idx < result.len() {
356                result[word_idx] |= mask << bit_offset;
357                if bit_offset > 60 && word_idx + 1 < result.len() {
358                    result[word_idx + 1] |= mask >> (64 - bit_offset);
359                }
360            }
361        }
362
363        let remainder_start = chunks * 4;
364        for idx in remainder_start..len {
365            if data[idx] > threshold {
366                set_bit(result, idx);
367            }
368        }
369    }
370}
371
372// =============================================================================
373// NEON SIMD Implementations (aarch64)
374// =============================================================================
375
376#[cfg(target_arch = "aarch64")]
377mod neon {
378    use super::*;
379    use std::arch::aarch64::*;
380
381    /// NEON is always available on aarch64
382    #[inline]
383    #[allow(dead_code)]
384    pub fn is_available() -> bool {
385        true
386    }
387
388    /// NEON filter: i64 > threshold
389    ///
390    /// Processes 2 i64 values per iteration using 128-bit vectors.
391    #[target_feature(enable = "neon")]
392    pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
393        let threshold_vec = vdupq_n_s64(threshold);
394        let len = data.len();
395        let chunks = len / 2;
396
397        for chunk_idx in 0..chunks {
398            let offset = chunk_idx * 2;
399            let data_vec = vld1q_s64(data.as_ptr().add(offset));
400
401            // Compare greater than (returns uint64x2_t)
402            let cmp = vcgtq_s64(data_vec, threshold_vec);
403
404            // Extract mask (2 bits) - cmp is already uint64x2_t
405            let mask_low = vgetq_lane_u64(cmp, 0);
406            let mask_high = vgetq_lane_u64(cmp, 1);
407            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
408
409            let word_idx = offset / 64;
410            let bit_offset = offset % 64;
411            if word_idx < result.len() {
412                result[word_idx] |= mask << bit_offset;
413            }
414        }
415
416        // Handle remainder
417        let remainder_start = chunks * 2;
418        for idx in remainder_start..len {
419            if data[idx] > threshold {
420                set_bit(result, idx);
421            }
422        }
423    }}
424
425    /// NEON filter: i64 < threshold
426    #[target_feature(enable = "neon")]
427    pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
428        let threshold_vec = vdupq_n_s64(threshold);
429        let len = data.len();
430        let chunks = len / 2;
431
432        for chunk_idx in 0..chunks {
433            let offset = chunk_idx * 2;
434            let data_vec = vld1q_s64(data.as_ptr().add(offset));
435
436            let cmp = vcltq_s64(data_vec, threshold_vec);
437
438            // cmp is already uint64x2_t
439            let mask_low = vgetq_lane_u64(cmp, 0);
440            let mask_high = vgetq_lane_u64(cmp, 1);
441            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
442
443            let word_idx = offset / 64;
444            let bit_offset = offset % 64;
445            if word_idx < result.len() {
446                result[word_idx] |= mask << bit_offset;
447            }
448        }
449
450        let remainder_start = chunks * 2;
451        for idx in remainder_start..len {
452            if data[idx] < threshold {
453                set_bit(result, idx);
454            }
455        }
456    }}
457
458    /// NEON filter: i64 == value
459    #[target_feature(enable = "neon")]
460    pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) { unsafe {
461        let target_vec = vdupq_n_s64(target);
462        let len = data.len();
463        let chunks = len / 2;
464
465        for chunk_idx in 0..chunks {
466            let offset = chunk_idx * 2;
467            let data_vec = vld1q_s64(data.as_ptr().add(offset));
468
469            let cmp = vceqq_s64(data_vec, target_vec);
470
471            // cmp is already uint64x2_t
472            let mask_low = vgetq_lane_u64(cmp, 0);
473            let mask_high = vgetq_lane_u64(cmp, 1);
474            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
475
476            let word_idx = offset / 64;
477            let bit_offset = offset % 64;
478            if word_idx < result.len() {
479                result[word_idx] |= mask << bit_offset;
480            }
481        }
482
483        let remainder_start = chunks * 2;
484        for idx in remainder_start..len {
485            if data[idx] == target {
486                set_bit(result, idx);
487            }
488        }
489    }}
490
491    /// NEON filter: f64 > threshold
492    #[target_feature(enable = "neon")]
493    pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) { unsafe {
494        let threshold_vec = vdupq_n_f64(threshold);
495        let len = data.len();
496        let chunks = len / 2;
497
498        for chunk_idx in 0..chunks {
499            let offset = chunk_idx * 2;
500            let data_vec = vld1q_f64(data.as_ptr().add(offset));
501
502            let cmp = vcgtq_f64(data_vec, threshold_vec);
503
504            let mask_low = vgetq_lane_u64(cmp, 0);
505            let mask_high = vgetq_lane_u64(cmp, 1);
506            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
507
508            let word_idx = offset / 64;
509            let bit_offset = offset % 64;
510            if word_idx < result.len() {
511                result[word_idx] |= mask << bit_offset;
512            }
513        }
514
515        let remainder_start = chunks * 2;
516        for idx in remainder_start..len {
517            if data[idx] > threshold {
518                set_bit(result, idx);
519            }
520        }
521    }}
522}
523
524// =============================================================================
525// Public API with Automatic Dispatch
526// =============================================================================
527
528/// Filter i64 column: value > threshold
529pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
530    #[cfg(target_arch = "x86_64")]
531    {
532        if avx2::is_available() {
533            unsafe {
534                avx2::filter_i64_gt_avx2(data, threshold, result);
535            }
536            return;
537        }
538    }
539
540    #[cfg(target_arch = "aarch64")]
541    {
542        unsafe {
543            neon::filter_i64_gt_neon(data, threshold, result);
544        }
545        return;
546    }
547
548    #[allow(unreachable_code)]
549    filter_i64_gt_scalar(data, threshold, result);
550}
551
552/// Filter i64 column: value < threshold
553pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
554    #[cfg(target_arch = "x86_64")]
555    {
556        if avx2::is_available() {
557            unsafe {
558                avx2::filter_i64_lt_avx2(data, threshold, result);
559            }
560            return;
561        }
562    }
563
564    #[cfg(target_arch = "aarch64")]
565    {
566        unsafe {
567            neon::filter_i64_lt_neon(data, threshold, result);
568        }
569        return;
570    }
571
572    #[allow(unreachable_code)]
573    filter_i64_lt_scalar(data, threshold, result);
574}
575
576/// Filter i64 column: value == target
577pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
578    #[cfg(target_arch = "x86_64")]
579    {
580        if avx2::is_available() {
581            unsafe {
582                avx2::filter_i64_eq_avx2(data, target, result);
583            }
584            return;
585        }
586    }
587
588    #[cfg(target_arch = "aarch64")]
589    {
590        unsafe {
591            neon::filter_i64_eq_neon(data, target, result);
592        }
593        return;
594    }
595
596    #[allow(unreachable_code)]
597    filter_i64_eq_scalar(data, target, result);
598}
599
600/// Filter i64 column: low <= value <= high
601pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
602    #[cfg(target_arch = "x86_64")]
603    {
604        if avx2::is_available() {
605            unsafe {
606                avx2::filter_i64_between_avx2(data, low, high, result);
607            }
608            return;
609        }
610    }
611
612    // Fallback to scalar
613    filter_i64_between_scalar(data, low, high, result);
614}
615
616/// Filter f64 column: value > threshold
617pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
618    #[cfg(target_arch = "x86_64")]
619    {
620        if avx2::is_available() {
621            unsafe {
622                avx2::filter_f64_gt_avx2(data, threshold, result);
623            }
624            return;
625        }
626    }
627
628    #[cfg(target_arch = "aarch64")]
629    {
630        unsafe {
631            neon::filter_f64_gt_neon(data, threshold, result);
632        }
633        return;
634    }
635
636    #[allow(unreachable_code)]
637    filter_f64_gt_scalar(data, threshold, result);
638}
639
640/// Get information about SIMD support
641pub fn simd_info() -> SimdInfo {
642    SimdInfo {
643        #[cfg(target_arch = "x86_64")]
644        has_avx2: is_x86_feature_detected!("avx2"),
645        #[cfg(target_arch = "x86_64")]
646        has_avx512f: is_x86_feature_detected!("avx512f"),
647        #[cfg(not(target_arch = "x86_64"))]
648        has_avx2: false,
649        #[cfg(not(target_arch = "x86_64"))]
650        has_avx512f: false,
651        #[cfg(target_arch = "aarch64")]
652        has_neon: true,
653        #[cfg(not(target_arch = "aarch64"))]
654        has_neon: false,
655    }
656}
657
658/// SIMD capability information
659#[derive(Debug, Clone)]
660pub struct SimdInfo {
661    pub has_avx2: bool,
662    pub has_avx512f: bool,
663    pub has_neon: bool,
664}
665
666impl SimdInfo {
667    /// Get expected speedup factor for i64 filters
668    pub fn expected_speedup_i64(&self) -> f64 {
669        if self.has_avx512f {
670            8.0
671        } else if self.has_avx2 {
672            4.0
673        } else if self.has_neon {
674            2.0
675        } else {
676            1.0
677        }
678    }
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684
685    #[test]
686    fn test_filter_i64_gt() {
687        let data: Vec<i64> = (0..100).collect();
688        let mut result = allocate_bitmap(data.len());
689
690        filter_i64_gt(&data, 50, &mut result);
691
692        // Values 51-99 should pass (49 values)
693        assert_eq!(popcount(&result), 49);
694
695        for i in 0..100 {
696            assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
697        }
698    }
699
700    #[test]
701    fn test_filter_i64_lt() {
702        let data: Vec<i64> = (0..100).collect();
703        let mut result = allocate_bitmap(data.len());
704
705        filter_i64_lt(&data, 50, &mut result);
706
707        // Values 0-49 should pass (50 values)
708        assert_eq!(popcount(&result), 50);
709
710        for i in 0..100 {
711            assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
712        }
713    }
714
715    #[test]
716    fn test_filter_i64_eq() {
717        let data: Vec<i64> = (0..100).collect();
718        let mut result = allocate_bitmap(data.len());
719
720        filter_i64_eq(&data, 42, &mut result);
721
722        assert_eq!(popcount(&result), 1);
723        assert!(get_bit(&result, 42));
724    }
725
726    #[test]
727    fn test_filter_i64_between() {
728        let data: Vec<i64> = (0..100).collect();
729        let mut result = allocate_bitmap(data.len());
730
731        filter_i64_between(&data, 25, 75, &mut result);
732
733        // Values 25-75 inclusive (51 values)
734        assert_eq!(popcount(&result), 51);
735
736        for i in 0..100 {
737            assert_eq!(
738                get_bit(&result, i),
739                i >= 25 && i <= 75,
740                "Failed at index {}",
741                i
742            );
743        }
744    }
745
746    #[test]
747    fn test_filter_f64_gt() {
748        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
749        let mut result = allocate_bitmap(data.len());
750
751        filter_f64_gt(&data, 50.0, &mut result);
752
753        assert_eq!(popcount(&result), 49);
754    }
755
756    #[test]
757    fn test_bitmap_operations() {
758        let mut a = allocate_bitmap(64);
759        let mut b = allocate_bitmap(64);
760
761        for i in 0..32 {
762            set_bit(&mut a, i);
763        }
764        for i in 16..48 {
765            set_bit(&mut b, i);
766        }
767
768        let and_result = bitmap_and(&a, &b);
769        assert_eq!(popcount(&and_result), 16); // 16-31
770
771        let or_result = bitmap_or(&a, &b);
772        assert_eq!(popcount(&or_result), 48); // 0-47
773    }
774
775    #[test]
776    fn test_simd_info() {
777        let info = simd_info();
778        println!("SIMD capabilities: {:?}", info);
779        println!("Expected speedup: {}x", info.expected_speedup_i64());
780    }
781
782    #[test]
783    fn test_large_dataset() {
784        // Test with 1M rows
785        let data: Vec<i64> = (0..1_000_000).collect();
786        let mut result = allocate_bitmap(data.len());
787
788        let start = std::time::Instant::now();
789        filter_i64_gt(&data, 500_000, &mut result);
790        let elapsed = start.elapsed();
791
792        assert_eq!(popcount(&result), 499_999);
793        println!("Filtered 1M rows in {:?}", elapsed);
794    }
795}