1pub type FilterBitmap = Vec<u64>;
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum FilterOp {
51 Equal,
52 NotEqual,
53 LessThan,
54 LessEqual,
55 GreaterThan,
56 GreaterEqual,
57 IsNull,
58 IsNotNull,
59}
60
61#[inline]
63pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
64 vec![0u64; (num_rows + 63) / 64]
65}
66
67#[inline]
69pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
70 let word_idx = idx / 64;
71 let bit_idx = idx % 64;
72 if word_idx < bitmap.len() {
73 bitmap[word_idx] |= 1u64 << bit_idx;
74 }
75}
76
77#[inline]
79pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
80 let word_idx = idx / 64;
81 let bit_idx = idx % 64;
82 if word_idx < bitmap.len() {
83 (bitmap[word_idx] >> bit_idx) & 1 == 1
84 } else {
85 false
86 }
87}
88
89pub fn popcount(bitmap: &FilterBitmap) -> usize {
91 bitmap.iter().map(|w| w.count_ones() as usize).sum()
92}
93
94pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
96 a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
97}
98
99pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
101 a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
102}
103
104pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
106 a.iter().map(|x| !x).collect()
107}
108
109pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
115 for (idx, &value) in data.iter().enumerate() {
116 if value > threshold {
117 set_bit(result, idx);
118 }
119 }
120}
121
122pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
124 for (idx, &value) in data.iter().enumerate() {
125 if value >= threshold {
126 set_bit(result, idx);
127 }
128 }
129}
130
131pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
133 for (idx, &value) in data.iter().enumerate() {
134 if value < threshold {
135 set_bit(result, idx);
136 }
137 }
138}
139
140pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
142 for (idx, &value) in data.iter().enumerate() {
143 if value == target {
144 set_bit(result, idx);
145 }
146 }
147}
148
149pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
151 for (idx, &value) in data.iter().enumerate() {
152 if value >= low && value <= high {
153 set_bit(result, idx);
154 }
155 }
156}
157
158pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
160 for (idx, &value) in data.iter().enumerate() {
161 if value > threshold {
162 set_bit(result, idx);
163 }
164 }
165}
166
167pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
169 for (idx, &value) in data.iter().enumerate() {
170 if value < threshold {
171 set_bit(result, idx);
172 }
173 }
174}
175
176#[cfg(target_arch = "x86_64")]
181mod avx2 {
182 use super::*;
183 use std::arch::x86_64::*;
184
185 #[inline]
187 pub fn is_available() -> bool {
188 is_x86_feature_detected!("avx2")
189 }
190
191 #[target_feature(enable = "avx2")]
195 pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
196 let threshold_vec = _mm256_set1_epi64x(threshold);
197 let len = data.len();
198 let chunks = len / 4;
199
200 for chunk_idx in 0..chunks {
201 let offset = chunk_idx * 4;
202 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
203
204 let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
206
207 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
209
210 let word_idx = offset / 64;
212 let bit_offset = offset % 64;
213 if word_idx < result.len() {
214 result[word_idx] |= mask << bit_offset;
215 if bit_offset > 60 && word_idx + 1 < result.len() {
217 result[word_idx + 1] |= mask >> (64 - bit_offset);
218 }
219 }
220 }
221
222 let remainder_start = chunks * 4;
224 for idx in remainder_start..len {
225 if data[idx] > threshold {
226 set_bit(result, idx);
227 }
228 }
229 }
230
231 #[target_feature(enable = "avx2")]
233 pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
234 let threshold_vec = _mm256_set1_epi64x(threshold);
235 let len = data.len();
236 let chunks = len / 4;
237
238 for chunk_idx in 0..chunks {
239 let offset = chunk_idx * 4;
240 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
241
242 let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
244 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
245
246 let word_idx = offset / 64;
247 let bit_offset = offset % 64;
248 if word_idx < result.len() {
249 result[word_idx] |= mask << bit_offset;
250 if bit_offset > 60 && word_idx + 1 < result.len() {
251 result[word_idx + 1] |= mask >> (64 - bit_offset);
252 }
253 }
254 }
255
256 let remainder_start = chunks * 4;
257 for idx in remainder_start..len {
258 if data[idx] < threshold {
259 set_bit(result, idx);
260 }
261 }
262 }
263
264 #[target_feature(enable = "avx2")]
266 pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
267 let target_vec = _mm256_set1_epi64x(target);
268 let len = data.len();
269 let chunks = len / 4;
270
271 for chunk_idx in 0..chunks {
272 let offset = chunk_idx * 4;
273 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
274
275 let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
276 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
277
278 let word_idx = offset / 64;
279 let bit_offset = offset % 64;
280 if word_idx < result.len() {
281 result[word_idx] |= mask << bit_offset;
282 if bit_offset > 60 && word_idx + 1 < result.len() {
283 result[word_idx + 1] |= mask >> (64 - bit_offset);
284 }
285 }
286 }
287
288 let remainder_start = chunks * 4;
289 for idx in remainder_start..len {
290 if data[idx] == target {
291 set_bit(result, idx);
292 }
293 }
294 }
295
296 #[target_feature(enable = "avx2")]
298 pub unsafe fn filter_i64_between_avx2(
299 data: &[i64],
300 low: i64,
301 high: i64,
302 result: &mut FilterBitmap,
303 ) {
304 let low_vec = _mm256_set1_epi64x(low - 1); let high_vec = _mm256_set1_epi64x(high);
306 let len = data.len();
307 let chunks = len / 4;
308
309 for chunk_idx in 0..chunks {
310 let offset = chunk_idx * 4;
311 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
312
313 let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
315 let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
316 let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
317 let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
318 let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
319
320 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
321
322 let word_idx = offset / 64;
323 let bit_offset = offset % 64;
324 if word_idx < result.len() {
325 result[word_idx] |= mask << bit_offset;
326 if bit_offset > 60 && word_idx + 1 < result.len() {
327 result[word_idx + 1] |= mask >> (64 - bit_offset);
328 }
329 }
330 }
331
332 let remainder_start = chunks * 4;
333 for idx in remainder_start..len {
334 let v = data[idx];
335 if v >= low && v <= high {
336 set_bit(result, idx);
337 }
338 }
339 }
340
341 #[target_feature(enable = "avx2")]
343 pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
344 let threshold_vec = _mm256_set1_pd(threshold);
345 let len = data.len();
346 let chunks = len / 4;
347
348 for chunk_idx in 0..chunks {
349 let offset = chunk_idx * 4;
350 let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
351
352 let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
353 let mask = _mm256_movemask_pd(cmp) as u64;
354
355 let word_idx = offset / 64;
356 let bit_offset = offset % 64;
357 if word_idx < result.len() {
358 result[word_idx] |= mask << bit_offset;
359 if bit_offset > 60 && word_idx + 1 < result.len() {
360 result[word_idx + 1] |= mask >> (64 - bit_offset);
361 }
362 }
363 }
364
365 let remainder_start = chunks * 4;
366 for idx in remainder_start..len {
367 if data[idx] > threshold {
368 set_bit(result, idx);
369 }
370 }
371 }
372}
373
374#[cfg(target_arch = "aarch64")]
379mod neon {
380 use super::*;
381 use std::arch::aarch64::*;
382
383 #[inline]
385 #[allow(dead_code)]
386 pub fn is_available() -> bool {
387 true
388 }
389
390 #[target_feature(enable = "neon")]
394 pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
395 unsafe {
396 let threshold_vec = vdupq_n_s64(threshold);
397 let len = data.len();
398 let chunks = len / 2;
399
400 for chunk_idx in 0..chunks {
401 let offset = chunk_idx * 2;
402 let data_vec = vld1q_s64(data.as_ptr().add(offset));
403
404 let cmp = vcgtq_s64(data_vec, threshold_vec);
406
407 let mask_low = vgetq_lane_u64(cmp, 0);
409 let mask_high = vgetq_lane_u64(cmp, 1);
410 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
411
412 let word_idx = offset / 64;
413 let bit_offset = offset % 64;
414 if word_idx < result.len() {
415 result[word_idx] |= mask << bit_offset;
416 }
417 }
418
419 let remainder_start = chunks * 2;
421 for idx in remainder_start..len {
422 if data[idx] > threshold {
423 set_bit(result, idx);
424 }
425 }
426 }
427 }
428
429 #[target_feature(enable = "neon")]
431 pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
432 unsafe {
433 let threshold_vec = vdupq_n_s64(threshold);
434 let len = data.len();
435 let chunks = len / 2;
436
437 for chunk_idx in 0..chunks {
438 let offset = chunk_idx * 2;
439 let data_vec = vld1q_s64(data.as_ptr().add(offset));
440
441 let cmp = vcltq_s64(data_vec, threshold_vec);
442
443 let mask_low = vgetq_lane_u64(cmp, 0);
445 let mask_high = vgetq_lane_u64(cmp, 1);
446 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
447
448 let word_idx = offset / 64;
449 let bit_offset = offset % 64;
450 if word_idx < result.len() {
451 result[word_idx] |= mask << bit_offset;
452 }
453 }
454
455 let remainder_start = chunks * 2;
456 for idx in remainder_start..len {
457 if data[idx] < threshold {
458 set_bit(result, idx);
459 }
460 }
461 }
462 }
463
464 #[target_feature(enable = "neon")]
466 pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) {
467 unsafe {
468 let target_vec = vdupq_n_s64(target);
469 let len = data.len();
470 let chunks = len / 2;
471
472 for chunk_idx in 0..chunks {
473 let offset = chunk_idx * 2;
474 let data_vec = vld1q_s64(data.as_ptr().add(offset));
475
476 let cmp = vceqq_s64(data_vec, target_vec);
477
478 let mask_low = vgetq_lane_u64(cmp, 0);
480 let mask_high = vgetq_lane_u64(cmp, 1);
481 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
482
483 let word_idx = offset / 64;
484 let bit_offset = offset % 64;
485 if word_idx < result.len() {
486 result[word_idx] |= mask << bit_offset;
487 }
488 }
489
490 let remainder_start = chunks * 2;
491 for idx in remainder_start..len {
492 if data[idx] == target {
493 set_bit(result, idx);
494 }
495 }
496 }
497 }
498
499 #[target_feature(enable = "neon")]
501 pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
502 unsafe {
503 let threshold_vec = vdupq_n_f64(threshold);
504 let len = data.len();
505 let chunks = len / 2;
506
507 for chunk_idx in 0..chunks {
508 let offset = chunk_idx * 2;
509 let data_vec = vld1q_f64(data.as_ptr().add(offset));
510
511 let cmp = vcgtq_f64(data_vec, threshold_vec);
512
513 let mask_low = vgetq_lane_u64(cmp, 0);
514 let mask_high = vgetq_lane_u64(cmp, 1);
515 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
516
517 let word_idx = offset / 64;
518 let bit_offset = offset % 64;
519 if word_idx < result.len() {
520 result[word_idx] |= mask << bit_offset;
521 }
522 }
523
524 let remainder_start = chunks * 2;
525 for idx in remainder_start..len {
526 if data[idx] > threshold {
527 set_bit(result, idx);
528 }
529 }
530 }
531 }
532}
533
534pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
540 #[cfg(target_arch = "x86_64")]
541 {
542 if avx2::is_available() {
543 unsafe {
544 avx2::filter_i64_gt_avx2(data, threshold, result);
545 }
546 return;
547 }
548 }
549
550 #[cfg(target_arch = "aarch64")]
551 {
552 unsafe {
553 neon::filter_i64_gt_neon(data, threshold, result);
554 }
555 return;
556 }
557
558 #[allow(unreachable_code)]
559 filter_i64_gt_scalar(data, threshold, result);
560}
561
562pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
564 #[cfg(target_arch = "x86_64")]
565 {
566 if avx2::is_available() {
567 unsafe {
568 avx2::filter_i64_lt_avx2(data, threshold, result);
569 }
570 return;
571 }
572 }
573
574 #[cfg(target_arch = "aarch64")]
575 {
576 unsafe {
577 neon::filter_i64_lt_neon(data, threshold, result);
578 }
579 return;
580 }
581
582 #[allow(unreachable_code)]
583 filter_i64_lt_scalar(data, threshold, result);
584}
585
586pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
588 #[cfg(target_arch = "x86_64")]
589 {
590 if avx2::is_available() {
591 unsafe {
592 avx2::filter_i64_eq_avx2(data, target, result);
593 }
594 return;
595 }
596 }
597
598 #[cfg(target_arch = "aarch64")]
599 {
600 unsafe {
601 neon::filter_i64_eq_neon(data, target, result);
602 }
603 return;
604 }
605
606 #[allow(unreachable_code)]
607 filter_i64_eq_scalar(data, target, result);
608}
609
610pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
612 #[cfg(target_arch = "x86_64")]
613 {
614 if avx2::is_available() {
615 unsafe {
616 avx2::filter_i64_between_avx2(data, low, high, result);
617 }
618 return;
619 }
620 }
621
622 filter_i64_between_scalar(data, low, high, result);
624}
625
626pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
628 #[cfg(target_arch = "x86_64")]
629 {
630 if avx2::is_available() {
631 unsafe {
632 avx2::filter_f64_gt_avx2(data, threshold, result);
633 }
634 return;
635 }
636 }
637
638 #[cfg(target_arch = "aarch64")]
639 {
640 unsafe {
641 neon::filter_f64_gt_neon(data, threshold, result);
642 }
643 return;
644 }
645
646 #[allow(unreachable_code)]
647 filter_f64_gt_scalar(data, threshold, result);
648}
649
650pub fn simd_info() -> SimdInfo {
652 SimdInfo {
653 #[cfg(target_arch = "x86_64")]
654 has_avx2: is_x86_feature_detected!("avx2"),
655 #[cfg(target_arch = "x86_64")]
656 has_avx512f: is_x86_feature_detected!("avx512f"),
657 #[cfg(not(target_arch = "x86_64"))]
658 has_avx2: false,
659 #[cfg(not(target_arch = "x86_64"))]
660 has_avx512f: false,
661 #[cfg(target_arch = "aarch64")]
662 has_neon: true,
663 #[cfg(not(target_arch = "aarch64"))]
664 has_neon: false,
665 }
666}
667
668#[derive(Debug, Clone)]
670pub struct SimdInfo {
671 pub has_avx2: bool,
672 pub has_avx512f: bool,
673 pub has_neon: bool,
674}
675
676impl SimdInfo {
677 pub fn expected_speedup_i64(&self) -> f64 {
679 if self.has_avx512f {
680 8.0
681 } else if self.has_avx2 {
682 4.0
683 } else if self.has_neon {
684 2.0
685 } else {
686 1.0
687 }
688 }
689}
690
691#[cfg(test)]
692mod tests {
693 use super::*;
694
695 #[test]
696 fn test_filter_i64_gt() {
697 let data: Vec<i64> = (0..100).collect();
698 let mut result = allocate_bitmap(data.len());
699
700 filter_i64_gt(&data, 50, &mut result);
701
702 assert_eq!(popcount(&result), 49);
704
705 for i in 0..100 {
706 assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
707 }
708 }
709
710 #[test]
711 fn test_filter_i64_lt() {
712 let data: Vec<i64> = (0..100).collect();
713 let mut result = allocate_bitmap(data.len());
714
715 filter_i64_lt(&data, 50, &mut result);
716
717 assert_eq!(popcount(&result), 50);
719
720 for i in 0..100 {
721 assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
722 }
723 }
724
725 #[test]
726 fn test_filter_i64_eq() {
727 let data: Vec<i64> = (0..100).collect();
728 let mut result = allocate_bitmap(data.len());
729
730 filter_i64_eq(&data, 42, &mut result);
731
732 assert_eq!(popcount(&result), 1);
733 assert!(get_bit(&result, 42));
734 }
735
736 #[test]
737 fn test_filter_i64_between() {
738 let data: Vec<i64> = (0..100).collect();
739 let mut result = allocate_bitmap(data.len());
740
741 filter_i64_between(&data, 25, 75, &mut result);
742
743 assert_eq!(popcount(&result), 51);
745
746 for i in 0..100 {
747 assert_eq!(
748 get_bit(&result, i),
749 i >= 25 && i <= 75,
750 "Failed at index {}",
751 i
752 );
753 }
754 }
755
756 #[test]
757 fn test_filter_f64_gt() {
758 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
759 let mut result = allocate_bitmap(data.len());
760
761 filter_f64_gt(&data, 50.0, &mut result);
762
763 assert_eq!(popcount(&result), 49);
764 }
765
766 #[test]
767 fn test_bitmap_operations() {
768 let mut a = allocate_bitmap(64);
769 let mut b = allocate_bitmap(64);
770
771 for i in 0..32 {
772 set_bit(&mut a, i);
773 }
774 for i in 16..48 {
775 set_bit(&mut b, i);
776 }
777
778 let and_result = bitmap_and(&a, &b);
779 assert_eq!(popcount(&and_result), 16); let or_result = bitmap_or(&a, &b);
782 assert_eq!(popcount(&or_result), 48); }
784
785 #[test]
786 fn test_simd_info() {
787 let info = simd_info();
788 println!("SIMD capabilities: {:?}", info);
789 println!("Expected speedup: {}x", info.expected_speedup_i64());
790 }
791
792 #[test]
793 fn test_large_dataset() {
794 let data: Vec<i64> = (0..1_000_000).collect();
796 let mut result = allocate_bitmap(data.len());
797
798 let start = std::time::Instant::now();
799 filter_i64_gt(&data, 500_000, &mut result);
800 let elapsed = start.elapsed();
801
802 assert_eq!(popcount(&result), 499_999);
803 println!("Filtered 1M rows in {:?}", elapsed);
804 }
805}