Skip to main content

sochdb_query/
simd_filter.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SIMD Vectorized Query Filters
19//!
20//! From mm.md Task 5.3: AVX-512/NEON Vectorized Filters (VQE)
21//!
22//! ## Problem
23//!
24//! Current filtering is scalar. LLM context queries often filter millions of rows
25//! (e.g., "events from last 7 days"). SIMD can evaluate 8-16 predicates per instruction.
26//!
27//! ## Solution
28//!
29//! Column-oriented data layout + compiled filter expressions + runtime SIMD feature detection
30//!
31//! ## Throughput Analysis
32//!
33//! ```text
34//! Scalar: 1 comparison/cycle × 3GHz = 3B comparisons/sec
35//! AVX-512: 8 comparisons/instruction × ~1 CPI × 3GHz = 24B/sec
36//! AVX-256: 4 comparisons/instruction = 12B/sec
37//! NEON: 4 comparisons/instruction = 12B/sec
38//!
39//! 100M rows @ 24B/sec = 4.2ms (AVX-512)
40//! 100M rows @ 3B/sec = 33ms (scalar)
41//!
42//! Speedup: 8× (AVX-512), 4× (AVX-256/NEON)
43//! ```
44
45
46/// Result bitmap - bit per row indicating pass/fail
47pub type FilterBitmap = Vec<u64>;
48
49/// Filter comparison operation
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum FilterOp {
52    Equal,
53    NotEqual,
54    LessThan,
55    LessEqual,
56    GreaterThan,
57    GreaterEqual,
58    IsNull,
59    IsNotNull,
60}
61
62/// Allocate a bitmap for the given number of rows
63#[inline]
64pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
65    vec![0u64; (num_rows + 63) / 64]
66}
67
68/// Set a bit in the bitmap
69#[inline]
70pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
71    let word_idx = idx / 64;
72    let bit_idx = idx % 64;
73    if word_idx < bitmap.len() {
74        bitmap[word_idx] |= 1u64 << bit_idx;
75    }
76}
77
78/// Check if a bit is set
79#[inline]
80pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
81    let word_idx = idx / 64;
82    let bit_idx = idx % 64;
83    if word_idx < bitmap.len() {
84        (bitmap[word_idx] >> bit_idx) & 1 == 1
85    } else {
86        false
87    }
88}
89
90/// Count set bits in bitmap
91pub fn popcount(bitmap: &FilterBitmap) -> usize {
92    bitmap.iter().map(|w| w.count_ones() as usize).sum()
93}
94
95/// AND two bitmaps together
96pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
97    a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
98}
99
100/// OR two bitmaps together
101pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
102    a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
103}
104
105/// NOT a bitmap
106pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
107    a.iter().map(|x| !x).collect()
108}
109
110// =============================================================================
111// Scalar Implementations (Fallback)
112// =============================================================================
113
114/// Scalar filter: i64 > threshold
115pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
116    for (idx, &value) in data.iter().enumerate() {
117        if value > threshold {
118            set_bit(result, idx);
119        }
120    }
121}
122
123/// Scalar filter: i64 >= threshold
124pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
125    for (idx, &value) in data.iter().enumerate() {
126        if value >= threshold {
127            set_bit(result, idx);
128        }
129    }
130}
131
132/// Scalar filter: i64 < threshold
133pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
134    for (idx, &value) in data.iter().enumerate() {
135        if value < threshold {
136            set_bit(result, idx);
137        }
138    }
139}
140
141/// Scalar filter: i64 == value
142pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
143    for (idx, &value) in data.iter().enumerate() {
144        if value == target {
145            set_bit(result, idx);
146        }
147    }
148}
149
150/// Scalar filter: i64 between low and high (inclusive)
151pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
152    for (idx, &value) in data.iter().enumerate() {
153        if value >= low && value <= high {
154            set_bit(result, idx);
155        }
156    }
157}
158
159/// Scalar filter: f64 > threshold
160pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
161    for (idx, &value) in data.iter().enumerate() {
162        if value > threshold {
163            set_bit(result, idx);
164        }
165    }
166}
167
168/// Scalar filter: f64 < threshold
169pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
170    for (idx, &value) in data.iter().enumerate() {
171        if value < threshold {
172            set_bit(result, idx);
173        }
174    }
175}
176
177// =============================================================================
178// AVX2 SIMD Implementations (x86_64)
179// =============================================================================
180
181#[cfg(target_arch = "x86_64")]
182mod avx2 {
183    use super::*;
184    use std::arch::x86_64::*;
185
186    /// Check if AVX2 is available
187    #[inline]
188    pub fn is_available() -> bool {
189        is_x86_feature_detected!("avx2")
190    }
191
192    /// AVX2 filter: i64 > threshold
193    ///
194    /// Processes 4 i64 values per iteration using 256-bit vectors.
195    #[target_feature(enable = "avx2")]
196    pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
197        let threshold_vec = _mm256_set1_epi64x(threshold);
198        let len = data.len();
199        let chunks = len / 4;
200
201        for chunk_idx in 0..chunks {
202            let offset = chunk_idx * 4;
203            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
204
205            // Compare greater than
206            let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
207
208            // Extract mask
209            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
210
211            // Set bits in result
212            let word_idx = offset / 64;
213            let bit_offset = offset % 64;
214            if word_idx < result.len() {
215                result[word_idx] |= mask << bit_offset;
216                // Handle overflow to next word
217                if bit_offset > 60 && word_idx + 1 < result.len() {
218                    result[word_idx + 1] |= mask >> (64 - bit_offset);
219                }
220            }
221        }
222
223        // Handle remainder with scalar
224        let remainder_start = chunks * 4;
225        for idx in remainder_start..len {
226            if data[idx] > threshold {
227                set_bit(result, idx);
228            }
229        }
230    }
231
232    /// AVX2 filter: i64 < threshold
233    #[target_feature(enable = "avx2")]
234    pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
235        let threshold_vec = _mm256_set1_epi64x(threshold);
236        let len = data.len();
237        let chunks = len / 4;
238
239        for chunk_idx in 0..chunks {
240            let offset = chunk_idx * 4;
241            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
242
243            // Compare: data < threshold is equivalent to threshold > data
244            let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
245            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
246
247            let word_idx = offset / 64;
248            let bit_offset = offset % 64;
249            if word_idx < result.len() {
250                result[word_idx] |= mask << bit_offset;
251                if bit_offset > 60 && word_idx + 1 < result.len() {
252                    result[word_idx + 1] |= mask >> (64 - bit_offset);
253                }
254            }
255        }
256
257        let remainder_start = chunks * 4;
258        for idx in remainder_start..len {
259            if data[idx] < threshold {
260                set_bit(result, idx);
261            }
262        }
263    }
264
265    /// AVX2 filter: i64 == value
266    #[target_feature(enable = "avx2")]
267    pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
268        let target_vec = _mm256_set1_epi64x(target);
269        let len = data.len();
270        let chunks = len / 4;
271
272        for chunk_idx in 0..chunks {
273            let offset = chunk_idx * 4;
274            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
275
276            let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
277            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
278
279            let word_idx = offset / 64;
280            let bit_offset = offset % 64;
281            if word_idx < result.len() {
282                result[word_idx] |= mask << bit_offset;
283                if bit_offset > 60 && word_idx + 1 < result.len() {
284                    result[word_idx + 1] |= mask >> (64 - bit_offset);
285                }
286            }
287        }
288
289        let remainder_start = chunks * 4;
290        for idx in remainder_start..len {
291            if data[idx] == target {
292                set_bit(result, idx);
293            }
294        }
295    }
296
297    /// AVX2 filter: i64 between low and high
298    #[target_feature(enable = "avx2")]
299    pub unsafe fn filter_i64_between_avx2(
300        data: &[i64],
301        low: i64,
302        high: i64,
303        result: &mut FilterBitmap,
304    ) {
305        let low_vec = _mm256_set1_epi64x(low - 1); // For >= comparison
306        let high_vec = _mm256_set1_epi64x(high);
307        let len = data.len();
308        let chunks = len / 4;
309
310        for chunk_idx in 0..chunks {
311            let offset = chunk_idx * 4;
312            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
313
314            // data > (low - 1) AND data <= high
315            let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
316            let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
317            let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
318            let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
319            let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
320
321            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
322
323            let word_idx = offset / 64;
324            let bit_offset = offset % 64;
325            if word_idx < result.len() {
326                result[word_idx] |= mask << bit_offset;
327                if bit_offset > 60 && word_idx + 1 < result.len() {
328                    result[word_idx + 1] |= mask >> (64 - bit_offset);
329                }
330            }
331        }
332
333        let remainder_start = chunks * 4;
334        for idx in remainder_start..len {
335            let v = data[idx];
336            if v >= low && v <= high {
337                set_bit(result, idx);
338            }
339        }
340    }
341
342    /// AVX2 filter: f64 > threshold
343    #[target_feature(enable = "avx2")]
344    pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
345        let threshold_vec = _mm256_set1_pd(threshold);
346        let len = data.len();
347        let chunks = len / 4;
348
349        for chunk_idx in 0..chunks {
350            let offset = chunk_idx * 4;
351            let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
352
353            let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
354            let mask = _mm256_movemask_pd(cmp) as u64;
355
356            let word_idx = offset / 64;
357            let bit_offset = offset % 64;
358            if word_idx < result.len() {
359                result[word_idx] |= mask << bit_offset;
360                if bit_offset > 60 && word_idx + 1 < result.len() {
361                    result[word_idx + 1] |= mask >> (64 - bit_offset);
362                }
363            }
364        }
365
366        let remainder_start = chunks * 4;
367        for idx in remainder_start..len {
368            if data[idx] > threshold {
369                set_bit(result, idx);
370            }
371        }
372    }
373}
374
375// =============================================================================
376// NEON SIMD Implementations (aarch64)
377// =============================================================================
378
379#[cfg(target_arch = "aarch64")]
380mod neon {
381    use super::*;
382    use std::arch::aarch64::*;
383
384    /// NEON is always available on aarch64
385    #[inline]
386    #[allow(dead_code)]
387    pub fn is_available() -> bool {
388        true
389    }
390
391    /// NEON filter: i64 > threshold
392    ///
393    /// Processes 2 i64 values per iteration using 128-bit vectors.
394    #[target_feature(enable = "neon")]
395    pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
396        let threshold_vec = vdupq_n_s64(threshold);
397        let len = data.len();
398        let chunks = len / 2;
399
400        for chunk_idx in 0..chunks {
401            let offset = chunk_idx * 2;
402            let data_vec = vld1q_s64(data.as_ptr().add(offset));
403
404            // Compare greater than (returns uint64x2_t)
405            let cmp = vcgtq_s64(data_vec, threshold_vec);
406
407            // Extract mask (2 bits) - cmp is already uint64x2_t
408            let mask_low = vgetq_lane_u64(cmp, 0);
409            let mask_high = vgetq_lane_u64(cmp, 1);
410            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
411
412            let word_idx = offset / 64;
413            let bit_offset = offset % 64;
414            if word_idx < result.len() {
415                result[word_idx] |= mask << bit_offset;
416            }
417        }
418
419        // Handle remainder
420        let remainder_start = chunks * 2;
421        for idx in remainder_start..len {
422            if data[idx] > threshold {
423                set_bit(result, idx);
424            }
425        }
426    }}
427
428    /// NEON filter: i64 < threshold
429    #[target_feature(enable = "neon")]
430    pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
431        let threshold_vec = vdupq_n_s64(threshold);
432        let len = data.len();
433        let chunks = len / 2;
434
435        for chunk_idx in 0..chunks {
436            let offset = chunk_idx * 2;
437            let data_vec = vld1q_s64(data.as_ptr().add(offset));
438
439            let cmp = vcltq_s64(data_vec, threshold_vec);
440
441            // cmp is already uint64x2_t
442            let mask_low = vgetq_lane_u64(cmp, 0);
443            let mask_high = vgetq_lane_u64(cmp, 1);
444            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
445
446            let word_idx = offset / 64;
447            let bit_offset = offset % 64;
448            if word_idx < result.len() {
449                result[word_idx] |= mask << bit_offset;
450            }
451        }
452
453        let remainder_start = chunks * 2;
454        for idx in remainder_start..len {
455            if data[idx] < threshold {
456                set_bit(result, idx);
457            }
458        }
459    }}
460
461    /// NEON filter: i64 == value
462    #[target_feature(enable = "neon")]
463    pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) { unsafe {
464        let target_vec = vdupq_n_s64(target);
465        let len = data.len();
466        let chunks = len / 2;
467
468        for chunk_idx in 0..chunks {
469            let offset = chunk_idx * 2;
470            let data_vec = vld1q_s64(data.as_ptr().add(offset));
471
472            let cmp = vceqq_s64(data_vec, target_vec);
473
474            // cmp is already uint64x2_t
475            let mask_low = vgetq_lane_u64(cmp, 0);
476            let mask_high = vgetq_lane_u64(cmp, 1);
477            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
478
479            let word_idx = offset / 64;
480            let bit_offset = offset % 64;
481            if word_idx < result.len() {
482                result[word_idx] |= mask << bit_offset;
483            }
484        }
485
486        let remainder_start = chunks * 2;
487        for idx in remainder_start..len {
488            if data[idx] == target {
489                set_bit(result, idx);
490            }
491        }
492    }}
493
494    /// NEON filter: f64 > threshold
495    #[target_feature(enable = "neon")]
496    pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) { unsafe {
497        let threshold_vec = vdupq_n_f64(threshold);
498        let len = data.len();
499        let chunks = len / 2;
500
501        for chunk_idx in 0..chunks {
502            let offset = chunk_idx * 2;
503            let data_vec = vld1q_f64(data.as_ptr().add(offset));
504
505            let cmp = vcgtq_f64(data_vec, threshold_vec);
506
507            let mask_low = vgetq_lane_u64(cmp, 0);
508            let mask_high = vgetq_lane_u64(cmp, 1);
509            let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
510
511            let word_idx = offset / 64;
512            let bit_offset = offset % 64;
513            if word_idx < result.len() {
514                result[word_idx] |= mask << bit_offset;
515            }
516        }
517
518        let remainder_start = chunks * 2;
519        for idx in remainder_start..len {
520            if data[idx] > threshold {
521                set_bit(result, idx);
522            }
523        }
524    }}
525}
526
527// =============================================================================
528// Public API with Automatic Dispatch
529// =============================================================================
530
531/// Filter i64 column: value > threshold
532pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
533    #[cfg(target_arch = "x86_64")]
534    {
535        if avx2::is_available() {
536            unsafe {
537                avx2::filter_i64_gt_avx2(data, threshold, result);
538            }
539            return;
540        }
541    }
542
543    #[cfg(target_arch = "aarch64")]
544    {
545        unsafe {
546            neon::filter_i64_gt_neon(data, threshold, result);
547        }
548        return;
549    }
550
551    #[allow(unreachable_code)]
552    filter_i64_gt_scalar(data, threshold, result);
553}
554
555/// Filter i64 column: value < threshold
556pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
557    #[cfg(target_arch = "x86_64")]
558    {
559        if avx2::is_available() {
560            unsafe {
561                avx2::filter_i64_lt_avx2(data, threshold, result);
562            }
563            return;
564        }
565    }
566
567    #[cfg(target_arch = "aarch64")]
568    {
569        unsafe {
570            neon::filter_i64_lt_neon(data, threshold, result);
571        }
572        return;
573    }
574
575    #[allow(unreachable_code)]
576    filter_i64_lt_scalar(data, threshold, result);
577}
578
579/// Filter i64 column: value == target
580pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
581    #[cfg(target_arch = "x86_64")]
582    {
583        if avx2::is_available() {
584            unsafe {
585                avx2::filter_i64_eq_avx2(data, target, result);
586            }
587            return;
588        }
589    }
590
591    #[cfg(target_arch = "aarch64")]
592    {
593        unsafe {
594            neon::filter_i64_eq_neon(data, target, result);
595        }
596        return;
597    }
598
599    #[allow(unreachable_code)]
600    filter_i64_eq_scalar(data, target, result);
601}
602
603/// Filter i64 column: low <= value <= high
604pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
605    #[cfg(target_arch = "x86_64")]
606    {
607        if avx2::is_available() {
608            unsafe {
609                avx2::filter_i64_between_avx2(data, low, high, result);
610            }
611            return;
612        }
613    }
614
615    // Fallback to scalar
616    filter_i64_between_scalar(data, low, high, result);
617}
618
619/// Filter f64 column: value > threshold
620pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
621    #[cfg(target_arch = "x86_64")]
622    {
623        if avx2::is_available() {
624            unsafe {
625                avx2::filter_f64_gt_avx2(data, threshold, result);
626            }
627            return;
628        }
629    }
630
631    #[cfg(target_arch = "aarch64")]
632    {
633        unsafe {
634            neon::filter_f64_gt_neon(data, threshold, result);
635        }
636        return;
637    }
638
639    #[allow(unreachable_code)]
640    filter_f64_gt_scalar(data, threshold, result);
641}
642
643/// Get information about SIMD support
644pub fn simd_info() -> SimdInfo {
645    SimdInfo {
646        #[cfg(target_arch = "x86_64")]
647        has_avx2: is_x86_feature_detected!("avx2"),
648        #[cfg(target_arch = "x86_64")]
649        has_avx512f: is_x86_feature_detected!("avx512f"),
650        #[cfg(not(target_arch = "x86_64"))]
651        has_avx2: false,
652        #[cfg(not(target_arch = "x86_64"))]
653        has_avx512f: false,
654        #[cfg(target_arch = "aarch64")]
655        has_neon: true,
656        #[cfg(not(target_arch = "aarch64"))]
657        has_neon: false,
658    }
659}
660
661/// SIMD capability information
662#[derive(Debug, Clone)]
663pub struct SimdInfo {
664    pub has_avx2: bool,
665    pub has_avx512f: bool,
666    pub has_neon: bool,
667}
668
669impl SimdInfo {
670    /// Get expected speedup factor for i64 filters
671    pub fn expected_speedup_i64(&self) -> f64 {
672        if self.has_avx512f {
673            8.0
674        } else if self.has_avx2 {
675            4.0
676        } else if self.has_neon {
677            2.0
678        } else {
679            1.0
680        }
681    }
682}
683
684#[cfg(test)]
685mod tests {
686    use super::*;
687
688    #[test]
689    fn test_filter_i64_gt() {
690        let data: Vec<i64> = (0..100).collect();
691        let mut result = allocate_bitmap(data.len());
692
693        filter_i64_gt(&data, 50, &mut result);
694
695        // Values 51-99 should pass (49 values)
696        assert_eq!(popcount(&result), 49);
697
698        for i in 0..100 {
699            assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
700        }
701    }
702
703    #[test]
704    fn test_filter_i64_lt() {
705        let data: Vec<i64> = (0..100).collect();
706        let mut result = allocate_bitmap(data.len());
707
708        filter_i64_lt(&data, 50, &mut result);
709
710        // Values 0-49 should pass (50 values)
711        assert_eq!(popcount(&result), 50);
712
713        for i in 0..100 {
714            assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
715        }
716    }
717
718    #[test]
719    fn test_filter_i64_eq() {
720        let data: Vec<i64> = (0..100).collect();
721        let mut result = allocate_bitmap(data.len());
722
723        filter_i64_eq(&data, 42, &mut result);
724
725        assert_eq!(popcount(&result), 1);
726        assert!(get_bit(&result, 42));
727    }
728
729    #[test]
730    fn test_filter_i64_between() {
731        let data: Vec<i64> = (0..100).collect();
732        let mut result = allocate_bitmap(data.len());
733
734        filter_i64_between(&data, 25, 75, &mut result);
735
736        // Values 25-75 inclusive (51 values)
737        assert_eq!(popcount(&result), 51);
738
739        for i in 0..100 {
740            assert_eq!(
741                get_bit(&result, i),
742                i >= 25 && i <= 75,
743                "Failed at index {}",
744                i
745            );
746        }
747    }
748
749    #[test]
750    fn test_filter_f64_gt() {
751        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
752        let mut result = allocate_bitmap(data.len());
753
754        filter_f64_gt(&data, 50.0, &mut result);
755
756        assert_eq!(popcount(&result), 49);
757    }
758
759    #[test]
760    fn test_bitmap_operations() {
761        let mut a = allocate_bitmap(64);
762        let mut b = allocate_bitmap(64);
763
764        for i in 0..32 {
765            set_bit(&mut a, i);
766        }
767        for i in 16..48 {
768            set_bit(&mut b, i);
769        }
770
771        let and_result = bitmap_and(&a, &b);
772        assert_eq!(popcount(&and_result), 16); // 16-31
773
774        let or_result = bitmap_or(&a, &b);
775        assert_eq!(popcount(&or_result), 48); // 0-47
776    }
777
778    #[test]
779    fn test_simd_info() {
780        let info = simd_info();
781        println!("SIMD capabilities: {:?}", info);
782        println!("Expected speedup: {}x", info.expected_speedup_i64());
783    }
784
785    #[test]
786    fn test_large_dataset() {
787        // Test with 1M rows
788        let data: Vec<i64> = (0..1_000_000).collect();
789        let mut result = allocate_bitmap(data.len());
790
791        let start = std::time::Instant::now();
792        filter_i64_gt(&data, 500_000, &mut result);
793        let elapsed = start.elapsed();
794
795        assert_eq!(popcount(&result), 499_999);
796        println!("Filtered 1M rows in {:?}", elapsed);
797    }
798}