Skip to main content

sochdb_query/
simd_filter.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SIMD Vectorized Query Filters
19//!
20//! From mm.md Task 5.3: AVX-512/NEON Vectorized Filters (VQE)
21//!
22//! ## Problem
23//!
24//! Current filtering is scalar. LLM context queries often filter millions of rows
25//! (e.g., "events from last 7 days"). SIMD can evaluate 8-16 predicates per instruction.
26//!
27//! ## Solution
28//!
29//! Column-oriented data layout + compiled filter expressions + runtime SIMD feature detection
30//!
31//! ## Throughput Analysis
32//!
33//! ```text
34//! Scalar: 1 comparison/cycle × 3GHz = 3B comparisons/sec
35//! AVX-512: 8 comparisons/instruction × ~1 CPI × 3GHz = 24B/sec
36//! AVX-256: 4 comparisons/instruction = 12B/sec
37//! NEON: 4 comparisons/instruction = 12B/sec
38//!
39//! 100M rows @ 24B/sec = 4.2ms (AVX-512)
40//! 100M rows @ 3B/sec = 33ms (scalar)
41//!
42//! Speedup: 8× (AVX-512), 4× (AVX-256/NEON)
43//! ```
44
45/// Result bitmap - bit per row indicating pass/fail
46pub type FilterBitmap = Vec<u64>;
47
48/// Filter comparison operation
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum FilterOp {
51    Equal,
52    NotEqual,
53    LessThan,
54    LessEqual,
55    GreaterThan,
56    GreaterEqual,
57    IsNull,
58    IsNotNull,
59}
60
61/// Allocate a bitmap for the given number of rows
62#[inline]
63pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
64    vec![0u64; (num_rows + 63) / 64]
65}
66
67/// Set a bit in the bitmap
68#[inline]
69pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
70    let word_idx = idx / 64;
71    let bit_idx = idx % 64;
72    if word_idx < bitmap.len() {
73        bitmap[word_idx] |= 1u64 << bit_idx;
74    }
75}
76
77/// Check if a bit is set
78#[inline]
79pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
80    let word_idx = idx / 64;
81    let bit_idx = idx % 64;
82    if word_idx < bitmap.len() {
83        (bitmap[word_idx] >> bit_idx) & 1 == 1
84    } else {
85        false
86    }
87}
88
89/// Count set bits in bitmap
90pub fn popcount(bitmap: &FilterBitmap) -> usize {
91    bitmap.iter().map(|w| w.count_ones() as usize).sum()
92}
93
94/// AND two bitmaps together
95pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
96    a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
97}
98
99/// OR two bitmaps together
100pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
101    a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
102}
103
104/// NOT a bitmap
105pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
106    a.iter().map(|x| !x).collect()
107}
108
109// =============================================================================
110// Scalar Implementations (Fallback)
111// =============================================================================
112
113/// Scalar filter: i64 > threshold
114pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
115    for (idx, &value) in data.iter().enumerate() {
116        if value > threshold {
117            set_bit(result, idx);
118        }
119    }
120}
121
122/// Scalar filter: i64 >= threshold
123pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
124    for (idx, &value) in data.iter().enumerate() {
125        if value >= threshold {
126            set_bit(result, idx);
127        }
128    }
129}
130
131/// Scalar filter: i64 < threshold
132pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
133    for (idx, &value) in data.iter().enumerate() {
134        if value < threshold {
135            set_bit(result, idx);
136        }
137    }
138}
139
140/// Scalar filter: i64 == value
141pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
142    for (idx, &value) in data.iter().enumerate() {
143        if value == target {
144            set_bit(result, idx);
145        }
146    }
147}
148
149/// Scalar filter: i64 between low and high (inclusive)
150pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
151    for (idx, &value) in data.iter().enumerate() {
152        if value >= low && value <= high {
153            set_bit(result, idx);
154        }
155    }
156}
157
158/// Scalar filter: f64 > threshold
159pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
160    for (idx, &value) in data.iter().enumerate() {
161        if value > threshold {
162            set_bit(result, idx);
163        }
164    }
165}
166
167/// Scalar filter: f64 < threshold
168pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
169    for (idx, &value) in data.iter().enumerate() {
170        if value < threshold {
171            set_bit(result, idx);
172        }
173    }
174}
175
176// =============================================================================
177// AVX2 SIMD Implementations (x86_64)
178// =============================================================================
179
180#[cfg(target_arch = "x86_64")]
181mod avx2 {
182    use super::*;
183    use std::arch::x86_64::*;
184
185    /// Check if AVX2 is available
186    #[inline]
187    pub fn is_available() -> bool {
188        is_x86_feature_detected!("avx2")
189    }
190
191    /// AVX2 filter: i64 > threshold
192    ///
193    /// Processes 4 i64 values per iteration using 256-bit vectors.
194    #[target_feature(enable = "avx2")]
195    pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
196        let threshold_vec = _mm256_set1_epi64x(threshold);
197        let len = data.len();
198        let chunks = len / 4;
199
200        for chunk_idx in 0..chunks {
201            let offset = chunk_idx * 4;
202            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
203
204            // Compare greater than
205            let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
206
207            // Extract mask
208            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
209
210            // Set bits in result
211            let word_idx = offset / 64;
212            let bit_offset = offset % 64;
213            if word_idx < result.len() {
214                result[word_idx] |= mask << bit_offset;
215                // Handle overflow to next word
216                if bit_offset > 60 && word_idx + 1 < result.len() {
217                    result[word_idx + 1] |= mask >> (64 - bit_offset);
218                }
219            }
220        }
221
222        // Handle remainder with scalar
223        let remainder_start = chunks * 4;
224        for idx in remainder_start..len {
225            if data[idx] > threshold {
226                set_bit(result, idx);
227            }
228        }
229    }
230
231    /// AVX2 filter: i64 < threshold
232    #[target_feature(enable = "avx2")]
233    pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
234        let threshold_vec = _mm256_set1_epi64x(threshold);
235        let len = data.len();
236        let chunks = len / 4;
237
238        for chunk_idx in 0..chunks {
239            let offset = chunk_idx * 4;
240            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
241
242            // Compare: data < threshold is equivalent to threshold > data
243            let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
244            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
245
246            let word_idx = offset / 64;
247            let bit_offset = offset % 64;
248            if word_idx < result.len() {
249                result[word_idx] |= mask << bit_offset;
250                if bit_offset > 60 && word_idx + 1 < result.len() {
251                    result[word_idx + 1] |= mask >> (64 - bit_offset);
252                }
253            }
254        }
255
256        let remainder_start = chunks * 4;
257        for idx in remainder_start..len {
258            if data[idx] < threshold {
259                set_bit(result, idx);
260            }
261        }
262    }
263
264    /// AVX2 filter: i64 == value
265    #[target_feature(enable = "avx2")]
266    pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
267        let target_vec = _mm256_set1_epi64x(target);
268        let len = data.len();
269        let chunks = len / 4;
270
271        for chunk_idx in 0..chunks {
272            let offset = chunk_idx * 4;
273            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
274
275            let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
276            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
277
278            let word_idx = offset / 64;
279            let bit_offset = offset % 64;
280            if word_idx < result.len() {
281                result[word_idx] |= mask << bit_offset;
282                if bit_offset > 60 && word_idx + 1 < result.len() {
283                    result[word_idx + 1] |= mask >> (64 - bit_offset);
284                }
285            }
286        }
287
288        let remainder_start = chunks * 4;
289        for idx in remainder_start..len {
290            if data[idx] == target {
291                set_bit(result, idx);
292            }
293        }
294    }
295
296    /// AVX2 filter: i64 between low and high
297    #[target_feature(enable = "avx2")]
298    pub unsafe fn filter_i64_between_avx2(
299        data: &[i64],
300        low: i64,
301        high: i64,
302        result: &mut FilterBitmap,
303    ) {
304        let low_vec = _mm256_set1_epi64x(low - 1); // For >= comparison
305        let high_vec = _mm256_set1_epi64x(high);
306        let len = data.len();
307        let chunks = len / 4;
308
309        for chunk_idx in 0..chunks {
310            let offset = chunk_idx * 4;
311            let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
312
313            // data > (low - 1) AND data <= high
314            let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
315            let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
316            let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
317            let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
318            let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
319
320            let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
321
322            let word_idx = offset / 64;
323            let bit_offset = offset % 64;
324            if word_idx < result.len() {
325                result[word_idx] |= mask << bit_offset;
326                if bit_offset > 60 && word_idx + 1 < result.len() {
327                    result[word_idx + 1] |= mask >> (64 - bit_offset);
328                }
329            }
330        }
331
332        let remainder_start = chunks * 4;
333        for idx in remainder_start..len {
334            let v = data[idx];
335            if v >= low && v <= high {
336                set_bit(result, idx);
337            }
338        }
339    }
340
341    /// AVX2 filter: f64 > threshold
342    #[target_feature(enable = "avx2")]
343    pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
344        let threshold_vec = _mm256_set1_pd(threshold);
345        let len = data.len();
346        let chunks = len / 4;
347
348        for chunk_idx in 0..chunks {
349            let offset = chunk_idx * 4;
350            let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
351
352            let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
353            let mask = _mm256_movemask_pd(cmp) as u64;
354
355            let word_idx = offset / 64;
356            let bit_offset = offset % 64;
357            if word_idx < result.len() {
358                result[word_idx] |= mask << bit_offset;
359                if bit_offset > 60 && word_idx + 1 < result.len() {
360                    result[word_idx + 1] |= mask >> (64 - bit_offset);
361                }
362            }
363        }
364
365        let remainder_start = chunks * 4;
366        for idx in remainder_start..len {
367            if data[idx] > threshold {
368                set_bit(result, idx);
369            }
370        }
371    }
372}
373
374// =============================================================================
375// NEON SIMD Implementations (aarch64)
376// =============================================================================
377
378#[cfg(target_arch = "aarch64")]
379mod neon {
380    use super::*;
381    use std::arch::aarch64::*;
382
383    /// NEON is always available on aarch64
384    #[inline]
385    #[allow(dead_code)]
386    pub fn is_available() -> bool {
387        true
388    }
389
390    /// NEON filter: i64 > threshold
391    ///
392    /// Processes 2 i64 values per iteration using 128-bit vectors.
393    #[target_feature(enable = "neon")]
394    pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
395        unsafe {
396            let threshold_vec = vdupq_n_s64(threshold);
397            let len = data.len();
398            let chunks = len / 2;
399
400            for chunk_idx in 0..chunks {
401                let offset = chunk_idx * 2;
402                let data_vec = vld1q_s64(data.as_ptr().add(offset));
403
404                // Compare greater than (returns uint64x2_t)
405                let cmp = vcgtq_s64(data_vec, threshold_vec);
406
407                // Extract mask (2 bits) - cmp is already uint64x2_t
408                let mask_low = vgetq_lane_u64(cmp, 0);
409                let mask_high = vgetq_lane_u64(cmp, 1);
410                let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
411
412                let word_idx = offset / 64;
413                let bit_offset = offset % 64;
414                if word_idx < result.len() {
415                    result[word_idx] |= mask << bit_offset;
416                }
417            }
418
419            // Handle remainder
420            let remainder_start = chunks * 2;
421            for idx in remainder_start..len {
422                if data[idx] > threshold {
423                    set_bit(result, idx);
424                }
425            }
426        }
427    }
428
429    /// NEON filter: i64 < threshold
430    #[target_feature(enable = "neon")]
431    pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
432        unsafe {
433            let threshold_vec = vdupq_n_s64(threshold);
434            let len = data.len();
435            let chunks = len / 2;
436
437            for chunk_idx in 0..chunks {
438                let offset = chunk_idx * 2;
439                let data_vec = vld1q_s64(data.as_ptr().add(offset));
440
441                let cmp = vcltq_s64(data_vec, threshold_vec);
442
443                // cmp is already uint64x2_t
444                let mask_low = vgetq_lane_u64(cmp, 0);
445                let mask_high = vgetq_lane_u64(cmp, 1);
446                let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
447
448                let word_idx = offset / 64;
449                let bit_offset = offset % 64;
450                if word_idx < result.len() {
451                    result[word_idx] |= mask << bit_offset;
452                }
453            }
454
455            let remainder_start = chunks * 2;
456            for idx in remainder_start..len {
457                if data[idx] < threshold {
458                    set_bit(result, idx);
459                }
460            }
461        }
462    }
463
464    /// NEON filter: i64 == value
465    #[target_feature(enable = "neon")]
466    pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) {
467        unsafe {
468            let target_vec = vdupq_n_s64(target);
469            let len = data.len();
470            let chunks = len / 2;
471
472            for chunk_idx in 0..chunks {
473                let offset = chunk_idx * 2;
474                let data_vec = vld1q_s64(data.as_ptr().add(offset));
475
476                let cmp = vceqq_s64(data_vec, target_vec);
477
478                // cmp is already uint64x2_t
479                let mask_low = vgetq_lane_u64(cmp, 0);
480                let mask_high = vgetq_lane_u64(cmp, 1);
481                let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
482
483                let word_idx = offset / 64;
484                let bit_offset = offset % 64;
485                if word_idx < result.len() {
486                    result[word_idx] |= mask << bit_offset;
487                }
488            }
489
490            let remainder_start = chunks * 2;
491            for idx in remainder_start..len {
492                if data[idx] == target {
493                    set_bit(result, idx);
494                }
495            }
496        }
497    }
498
499    /// NEON filter: f64 > threshold
500    #[target_feature(enable = "neon")]
501    pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
502        unsafe {
503            let threshold_vec = vdupq_n_f64(threshold);
504            let len = data.len();
505            let chunks = len / 2;
506
507            for chunk_idx in 0..chunks {
508                let offset = chunk_idx * 2;
509                let data_vec = vld1q_f64(data.as_ptr().add(offset));
510
511                let cmp = vcgtq_f64(data_vec, threshold_vec);
512
513                let mask_low = vgetq_lane_u64(cmp, 0);
514                let mask_high = vgetq_lane_u64(cmp, 1);
515                let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
516
517                let word_idx = offset / 64;
518                let bit_offset = offset % 64;
519                if word_idx < result.len() {
520                    result[word_idx] |= mask << bit_offset;
521                }
522            }
523
524            let remainder_start = chunks * 2;
525            for idx in remainder_start..len {
526                if data[idx] > threshold {
527                    set_bit(result, idx);
528                }
529            }
530        }
531    }
532}
533
534// =============================================================================
535// Public API with Automatic Dispatch
536// =============================================================================
537
538/// Filter i64 column: value > threshold
539pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
540    #[cfg(target_arch = "x86_64")]
541    {
542        if avx2::is_available() {
543            unsafe {
544                avx2::filter_i64_gt_avx2(data, threshold, result);
545            }
546            return;
547        }
548    }
549
550    #[cfg(target_arch = "aarch64")]
551    {
552        unsafe {
553            neon::filter_i64_gt_neon(data, threshold, result);
554        }
555        return;
556    }
557
558    #[allow(unreachable_code)]
559    filter_i64_gt_scalar(data, threshold, result);
560}
561
562/// Filter i64 column: value < threshold
563pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
564    #[cfg(target_arch = "x86_64")]
565    {
566        if avx2::is_available() {
567            unsafe {
568                avx2::filter_i64_lt_avx2(data, threshold, result);
569            }
570            return;
571        }
572    }
573
574    #[cfg(target_arch = "aarch64")]
575    {
576        unsafe {
577            neon::filter_i64_lt_neon(data, threshold, result);
578        }
579        return;
580    }
581
582    #[allow(unreachable_code)]
583    filter_i64_lt_scalar(data, threshold, result);
584}
585
586/// Filter i64 column: value == target
587pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
588    #[cfg(target_arch = "x86_64")]
589    {
590        if avx2::is_available() {
591            unsafe {
592                avx2::filter_i64_eq_avx2(data, target, result);
593            }
594            return;
595        }
596    }
597
598    #[cfg(target_arch = "aarch64")]
599    {
600        unsafe {
601            neon::filter_i64_eq_neon(data, target, result);
602        }
603        return;
604    }
605
606    #[allow(unreachable_code)]
607    filter_i64_eq_scalar(data, target, result);
608}
609
610/// Filter i64 column: low <= value <= high
611pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
612    #[cfg(target_arch = "x86_64")]
613    {
614        if avx2::is_available() {
615            unsafe {
616                avx2::filter_i64_between_avx2(data, low, high, result);
617            }
618            return;
619        }
620    }
621
622    // Fallback to scalar
623    filter_i64_between_scalar(data, low, high, result);
624}
625
626/// Filter f64 column: value > threshold
627pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
628    #[cfg(target_arch = "x86_64")]
629    {
630        if avx2::is_available() {
631            unsafe {
632                avx2::filter_f64_gt_avx2(data, threshold, result);
633            }
634            return;
635        }
636    }
637
638    #[cfg(target_arch = "aarch64")]
639    {
640        unsafe {
641            neon::filter_f64_gt_neon(data, threshold, result);
642        }
643        return;
644    }
645
646    #[allow(unreachable_code)]
647    filter_f64_gt_scalar(data, threshold, result);
648}
649
650/// Get information about SIMD support
651pub fn simd_info() -> SimdInfo {
652    SimdInfo {
653        #[cfg(target_arch = "x86_64")]
654        has_avx2: is_x86_feature_detected!("avx2"),
655        #[cfg(target_arch = "x86_64")]
656        has_avx512f: is_x86_feature_detected!("avx512f"),
657        #[cfg(not(target_arch = "x86_64"))]
658        has_avx2: false,
659        #[cfg(not(target_arch = "x86_64"))]
660        has_avx512f: false,
661        #[cfg(target_arch = "aarch64")]
662        has_neon: true,
663        #[cfg(not(target_arch = "aarch64"))]
664        has_neon: false,
665    }
666}
667
668/// SIMD capability information
669#[derive(Debug, Clone)]
670pub struct SimdInfo {
671    pub has_avx2: bool,
672    pub has_avx512f: bool,
673    pub has_neon: bool,
674}
675
676impl SimdInfo {
677    /// Get expected speedup factor for i64 filters
678    pub fn expected_speedup_i64(&self) -> f64 {
679        if self.has_avx512f {
680            8.0
681        } else if self.has_avx2 {
682            4.0
683        } else if self.has_neon {
684            2.0
685        } else {
686            1.0
687        }
688    }
689}
690
691#[cfg(test)]
692mod tests {
693    use super::*;
694
695    #[test]
696    fn test_filter_i64_gt() {
697        let data: Vec<i64> = (0..100).collect();
698        let mut result = allocate_bitmap(data.len());
699
700        filter_i64_gt(&data, 50, &mut result);
701
702        // Values 51-99 should pass (49 values)
703        assert_eq!(popcount(&result), 49);
704
705        for i in 0..100 {
706            assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
707        }
708    }
709
710    #[test]
711    fn test_filter_i64_lt() {
712        let data: Vec<i64> = (0..100).collect();
713        let mut result = allocate_bitmap(data.len());
714
715        filter_i64_lt(&data, 50, &mut result);
716
717        // Values 0-49 should pass (50 values)
718        assert_eq!(popcount(&result), 50);
719
720        for i in 0..100 {
721            assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
722        }
723    }
724
725    #[test]
726    fn test_filter_i64_eq() {
727        let data: Vec<i64> = (0..100).collect();
728        let mut result = allocate_bitmap(data.len());
729
730        filter_i64_eq(&data, 42, &mut result);
731
732        assert_eq!(popcount(&result), 1);
733        assert!(get_bit(&result, 42));
734    }
735
736    #[test]
737    fn test_filter_i64_between() {
738        let data: Vec<i64> = (0..100).collect();
739        let mut result = allocate_bitmap(data.len());
740
741        filter_i64_between(&data, 25, 75, &mut result);
742
743        // Values 25-75 inclusive (51 values)
744        assert_eq!(popcount(&result), 51);
745
746        for i in 0..100 {
747            assert_eq!(
748                get_bit(&result, i),
749                i >= 25 && i <= 75,
750                "Failed at index {}",
751                i
752            );
753        }
754    }
755
756    #[test]
757    fn test_filter_f64_gt() {
758        let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
759        let mut result = allocate_bitmap(data.len());
760
761        filter_f64_gt(&data, 50.0, &mut result);
762
763        assert_eq!(popcount(&result), 49);
764    }
765
766    #[test]
767    fn test_bitmap_operations() {
768        let mut a = allocate_bitmap(64);
769        let mut b = allocate_bitmap(64);
770
771        for i in 0..32 {
772            set_bit(&mut a, i);
773        }
774        for i in 16..48 {
775            set_bit(&mut b, i);
776        }
777
778        let and_result = bitmap_and(&a, &b);
779        assert_eq!(popcount(&and_result), 16); // 16-31
780
781        let or_result = bitmap_or(&a, &b);
782        assert_eq!(popcount(&or_result), 48); // 0-47
783    }
784
785    #[test]
786    fn test_simd_info() {
787        let info = simd_info();
788        println!("SIMD capabilities: {:?}", info);
789        println!("Expected speedup: {}x", info.expected_speedup_i64());
790    }
791
792    #[test]
793    fn test_large_dataset() {
794        // Test with 1M rows
795        let data: Vec<i64> = (0..1_000_000).collect();
796        let mut result = allocate_bitmap(data.len());
797
798        let start = std::time::Instant::now();
799        filter_i64_gt(&data, 500_000, &mut result);
800        let elapsed = start.elapsed();
801
802        assert_eq!(popcount(&result), 499_999);
803        println!("Filtered 1M rows in {:?}", elapsed);
804    }
805}