1pub type FilterBitmap = Vec<u64>;
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum FilterOp {
49 Equal,
50 NotEqual,
51 LessThan,
52 LessEqual,
53 GreaterThan,
54 GreaterEqual,
55 IsNull,
56 IsNotNull,
57}
58
59#[inline]
61pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
62 vec![0u64; (num_rows + 63) / 64]
63}
64
65#[inline]
67pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
68 let word_idx = idx / 64;
69 let bit_idx = idx % 64;
70 if word_idx < bitmap.len() {
71 bitmap[word_idx] |= 1u64 << bit_idx;
72 }
73}
74
75#[inline]
77pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
78 let word_idx = idx / 64;
79 let bit_idx = idx % 64;
80 if word_idx < bitmap.len() {
81 (bitmap[word_idx] >> bit_idx) & 1 == 1
82 } else {
83 false
84 }
85}
86
87pub fn popcount(bitmap: &FilterBitmap) -> usize {
89 bitmap.iter().map(|w| w.count_ones() as usize).sum()
90}
91
92pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
94 a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
95}
96
97pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
99 a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
100}
101
102pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
104 a.iter().map(|x| !x).collect()
105}
106
107pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
113 for (idx, &value) in data.iter().enumerate() {
114 if value > threshold {
115 set_bit(result, idx);
116 }
117 }
118}
119
120pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
122 for (idx, &value) in data.iter().enumerate() {
123 if value >= threshold {
124 set_bit(result, idx);
125 }
126 }
127}
128
129pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
131 for (idx, &value) in data.iter().enumerate() {
132 if value < threshold {
133 set_bit(result, idx);
134 }
135 }
136}
137
138pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
140 for (idx, &value) in data.iter().enumerate() {
141 if value == target {
142 set_bit(result, idx);
143 }
144 }
145}
146
147pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
149 for (idx, &value) in data.iter().enumerate() {
150 if value >= low && value <= high {
151 set_bit(result, idx);
152 }
153 }
154}
155
156pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
158 for (idx, &value) in data.iter().enumerate() {
159 if value > threshold {
160 set_bit(result, idx);
161 }
162 }
163}
164
165pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
167 for (idx, &value) in data.iter().enumerate() {
168 if value < threshold {
169 set_bit(result, idx);
170 }
171 }
172}
173
174#[cfg(target_arch = "x86_64")]
179mod avx2 {
180 use super::*;
181 use std::arch::x86_64::*;
182
183 #[inline]
185 pub fn is_available() -> bool {
186 is_x86_feature_detected!("avx2")
187 }
188
189 #[target_feature(enable = "avx2")]
193 pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
194 let threshold_vec = _mm256_set1_epi64x(threshold);
195 let len = data.len();
196 let chunks = len / 4;
197
198 for chunk_idx in 0..chunks {
199 let offset = chunk_idx * 4;
200 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
201
202 let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
204
205 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
207
208 let word_idx = offset / 64;
210 let bit_offset = offset % 64;
211 if word_idx < result.len() {
212 result[word_idx] |= mask << bit_offset;
213 if bit_offset > 60 && word_idx + 1 < result.len() {
215 result[word_idx + 1] |= mask >> (64 - bit_offset);
216 }
217 }
218 }
219
220 let remainder_start = chunks * 4;
222 for idx in remainder_start..len {
223 if data[idx] > threshold {
224 set_bit(result, idx);
225 }
226 }
227 }
228
229 #[target_feature(enable = "avx2")]
231 pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
232 let threshold_vec = _mm256_set1_epi64x(threshold);
233 let len = data.len();
234 let chunks = len / 4;
235
236 for chunk_idx in 0..chunks {
237 let offset = chunk_idx * 4;
238 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
239
240 let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
242 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
243
244 let word_idx = offset / 64;
245 let bit_offset = offset % 64;
246 if word_idx < result.len() {
247 result[word_idx] |= mask << bit_offset;
248 if bit_offset > 60 && word_idx + 1 < result.len() {
249 result[word_idx + 1] |= mask >> (64 - bit_offset);
250 }
251 }
252 }
253
254 let remainder_start = chunks * 4;
255 for idx in remainder_start..len {
256 if data[idx] < threshold {
257 set_bit(result, idx);
258 }
259 }
260 }
261
262 #[target_feature(enable = "avx2")]
264 pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
265 let target_vec = _mm256_set1_epi64x(target);
266 let len = data.len();
267 let chunks = len / 4;
268
269 for chunk_idx in 0..chunks {
270 let offset = chunk_idx * 4;
271 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
272
273 let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
274 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
275
276 let word_idx = offset / 64;
277 let bit_offset = offset % 64;
278 if word_idx < result.len() {
279 result[word_idx] |= mask << bit_offset;
280 if bit_offset > 60 && word_idx + 1 < result.len() {
281 result[word_idx + 1] |= mask >> (64 - bit_offset);
282 }
283 }
284 }
285
286 let remainder_start = chunks * 4;
287 for idx in remainder_start..len {
288 if data[idx] == target {
289 set_bit(result, idx);
290 }
291 }
292 }
293
294 #[target_feature(enable = "avx2")]
296 pub unsafe fn filter_i64_between_avx2(
297 data: &[i64],
298 low: i64,
299 high: i64,
300 result: &mut FilterBitmap,
301 ) {
302 let low_vec = _mm256_set1_epi64x(low - 1); let high_vec = _mm256_set1_epi64x(high);
304 let len = data.len();
305 let chunks = len / 4;
306
307 for chunk_idx in 0..chunks {
308 let offset = chunk_idx * 4;
309 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
310
311 let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
313 let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
314 let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
315 let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
316 let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
317
318 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
319
320 let word_idx = offset / 64;
321 let bit_offset = offset % 64;
322 if word_idx < result.len() {
323 result[word_idx] |= mask << bit_offset;
324 if bit_offset > 60 && word_idx + 1 < result.len() {
325 result[word_idx + 1] |= mask >> (64 - bit_offset);
326 }
327 }
328 }
329
330 let remainder_start = chunks * 4;
331 for idx in remainder_start..len {
332 let v = data[idx];
333 if v >= low && v <= high {
334 set_bit(result, idx);
335 }
336 }
337 }
338
339 #[target_feature(enable = "avx2")]
341 pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
342 let threshold_vec = _mm256_set1_pd(threshold);
343 let len = data.len();
344 let chunks = len / 4;
345
346 for chunk_idx in 0..chunks {
347 let offset = chunk_idx * 4;
348 let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
349
350 let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
351 let mask = _mm256_movemask_pd(cmp) as u64;
352
353 let word_idx = offset / 64;
354 let bit_offset = offset % 64;
355 if word_idx < result.len() {
356 result[word_idx] |= mask << bit_offset;
357 if bit_offset > 60 && word_idx + 1 < result.len() {
358 result[word_idx + 1] |= mask >> (64 - bit_offset);
359 }
360 }
361 }
362
363 let remainder_start = chunks * 4;
364 for idx in remainder_start..len {
365 if data[idx] > threshold {
366 set_bit(result, idx);
367 }
368 }
369 }
370}
371
372#[cfg(target_arch = "aarch64")]
377mod neon {
378 use super::*;
379 use std::arch::aarch64::*;
380
381 #[inline]
383 #[allow(dead_code)]
384 pub fn is_available() -> bool {
385 true
386 }
387
388 #[target_feature(enable = "neon")]
392 pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
393 let threshold_vec = vdupq_n_s64(threshold);
394 let len = data.len();
395 let chunks = len / 2;
396
397 for chunk_idx in 0..chunks {
398 let offset = chunk_idx * 2;
399 let data_vec = vld1q_s64(data.as_ptr().add(offset));
400
401 let cmp = vcgtq_s64(data_vec, threshold_vec);
403
404 let mask_low = vgetq_lane_u64(cmp, 0);
406 let mask_high = vgetq_lane_u64(cmp, 1);
407 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
408
409 let word_idx = offset / 64;
410 let bit_offset = offset % 64;
411 if word_idx < result.len() {
412 result[word_idx] |= mask << bit_offset;
413 }
414 }
415
416 let remainder_start = chunks * 2;
418 for idx in remainder_start..len {
419 if data[idx] > threshold {
420 set_bit(result, idx);
421 }
422 }
423 }}
424
425 #[target_feature(enable = "neon")]
427 pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
428 let threshold_vec = vdupq_n_s64(threshold);
429 let len = data.len();
430 let chunks = len / 2;
431
432 for chunk_idx in 0..chunks {
433 let offset = chunk_idx * 2;
434 let data_vec = vld1q_s64(data.as_ptr().add(offset));
435
436 let cmp = vcltq_s64(data_vec, threshold_vec);
437
438 let mask_low = vgetq_lane_u64(cmp, 0);
440 let mask_high = vgetq_lane_u64(cmp, 1);
441 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
442
443 let word_idx = offset / 64;
444 let bit_offset = offset % 64;
445 if word_idx < result.len() {
446 result[word_idx] |= mask << bit_offset;
447 }
448 }
449
450 let remainder_start = chunks * 2;
451 for idx in remainder_start..len {
452 if data[idx] < threshold {
453 set_bit(result, idx);
454 }
455 }
456 }}
457
458 #[target_feature(enable = "neon")]
460 pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) { unsafe {
461 let target_vec = vdupq_n_s64(target);
462 let len = data.len();
463 let chunks = len / 2;
464
465 for chunk_idx in 0..chunks {
466 let offset = chunk_idx * 2;
467 let data_vec = vld1q_s64(data.as_ptr().add(offset));
468
469 let cmp = vceqq_s64(data_vec, target_vec);
470
471 let mask_low = vgetq_lane_u64(cmp, 0);
473 let mask_high = vgetq_lane_u64(cmp, 1);
474 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
475
476 let word_idx = offset / 64;
477 let bit_offset = offset % 64;
478 if word_idx < result.len() {
479 result[word_idx] |= mask << bit_offset;
480 }
481 }
482
483 let remainder_start = chunks * 2;
484 for idx in remainder_start..len {
485 if data[idx] == target {
486 set_bit(result, idx);
487 }
488 }
489 }}
490
491 #[target_feature(enable = "neon")]
493 pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) { unsafe {
494 let threshold_vec = vdupq_n_f64(threshold);
495 let len = data.len();
496 let chunks = len / 2;
497
498 for chunk_idx in 0..chunks {
499 let offset = chunk_idx * 2;
500 let data_vec = vld1q_f64(data.as_ptr().add(offset));
501
502 let cmp = vcgtq_f64(data_vec, threshold_vec);
503
504 let mask_low = vgetq_lane_u64(cmp, 0);
505 let mask_high = vgetq_lane_u64(cmp, 1);
506 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
507
508 let word_idx = offset / 64;
509 let bit_offset = offset % 64;
510 if word_idx < result.len() {
511 result[word_idx] |= mask << bit_offset;
512 }
513 }
514
515 let remainder_start = chunks * 2;
516 for idx in remainder_start..len {
517 if data[idx] > threshold {
518 set_bit(result, idx);
519 }
520 }
521 }}
522}
523
524pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
530 #[cfg(target_arch = "x86_64")]
531 {
532 if avx2::is_available() {
533 unsafe {
534 avx2::filter_i64_gt_avx2(data, threshold, result);
535 }
536 return;
537 }
538 }
539
540 #[cfg(target_arch = "aarch64")]
541 {
542 unsafe {
543 neon::filter_i64_gt_neon(data, threshold, result);
544 }
545 return;
546 }
547
548 #[allow(unreachable_code)]
549 filter_i64_gt_scalar(data, threshold, result);
550}
551
552pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
554 #[cfg(target_arch = "x86_64")]
555 {
556 if avx2::is_available() {
557 unsafe {
558 avx2::filter_i64_lt_avx2(data, threshold, result);
559 }
560 return;
561 }
562 }
563
564 #[cfg(target_arch = "aarch64")]
565 {
566 unsafe {
567 neon::filter_i64_lt_neon(data, threshold, result);
568 }
569 return;
570 }
571
572 #[allow(unreachable_code)]
573 filter_i64_lt_scalar(data, threshold, result);
574}
575
576pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
578 #[cfg(target_arch = "x86_64")]
579 {
580 if avx2::is_available() {
581 unsafe {
582 avx2::filter_i64_eq_avx2(data, target, result);
583 }
584 return;
585 }
586 }
587
588 #[cfg(target_arch = "aarch64")]
589 {
590 unsafe {
591 neon::filter_i64_eq_neon(data, target, result);
592 }
593 return;
594 }
595
596 #[allow(unreachable_code)]
597 filter_i64_eq_scalar(data, target, result);
598}
599
600pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
602 #[cfg(target_arch = "x86_64")]
603 {
604 if avx2::is_available() {
605 unsafe {
606 avx2::filter_i64_between_avx2(data, low, high, result);
607 }
608 return;
609 }
610 }
611
612 filter_i64_between_scalar(data, low, high, result);
614}
615
616pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
618 #[cfg(target_arch = "x86_64")]
619 {
620 if avx2::is_available() {
621 unsafe {
622 avx2::filter_f64_gt_avx2(data, threshold, result);
623 }
624 return;
625 }
626 }
627
628 #[cfg(target_arch = "aarch64")]
629 {
630 unsafe {
631 neon::filter_f64_gt_neon(data, threshold, result);
632 }
633 return;
634 }
635
636 #[allow(unreachable_code)]
637 filter_f64_gt_scalar(data, threshold, result);
638}
639
640pub fn simd_info() -> SimdInfo {
642 SimdInfo {
643 #[cfg(target_arch = "x86_64")]
644 has_avx2: is_x86_feature_detected!("avx2"),
645 #[cfg(target_arch = "x86_64")]
646 has_avx512f: is_x86_feature_detected!("avx512f"),
647 #[cfg(not(target_arch = "x86_64"))]
648 has_avx2: false,
649 #[cfg(not(target_arch = "x86_64"))]
650 has_avx512f: false,
651 #[cfg(target_arch = "aarch64")]
652 has_neon: true,
653 #[cfg(not(target_arch = "aarch64"))]
654 has_neon: false,
655 }
656}
657
658#[derive(Debug, Clone)]
660pub struct SimdInfo {
661 pub has_avx2: bool,
662 pub has_avx512f: bool,
663 pub has_neon: bool,
664}
665
666impl SimdInfo {
667 pub fn expected_speedup_i64(&self) -> f64 {
669 if self.has_avx512f {
670 8.0
671 } else if self.has_avx2 {
672 4.0
673 } else if self.has_neon {
674 2.0
675 } else {
676 1.0
677 }
678 }
679}
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684
685 #[test]
686 fn test_filter_i64_gt() {
687 let data: Vec<i64> = (0..100).collect();
688 let mut result = allocate_bitmap(data.len());
689
690 filter_i64_gt(&data, 50, &mut result);
691
692 assert_eq!(popcount(&result), 49);
694
695 for i in 0..100 {
696 assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
697 }
698 }
699
700 #[test]
701 fn test_filter_i64_lt() {
702 let data: Vec<i64> = (0..100).collect();
703 let mut result = allocate_bitmap(data.len());
704
705 filter_i64_lt(&data, 50, &mut result);
706
707 assert_eq!(popcount(&result), 50);
709
710 for i in 0..100 {
711 assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
712 }
713 }
714
715 #[test]
716 fn test_filter_i64_eq() {
717 let data: Vec<i64> = (0..100).collect();
718 let mut result = allocate_bitmap(data.len());
719
720 filter_i64_eq(&data, 42, &mut result);
721
722 assert_eq!(popcount(&result), 1);
723 assert!(get_bit(&result, 42));
724 }
725
726 #[test]
727 fn test_filter_i64_between() {
728 let data: Vec<i64> = (0..100).collect();
729 let mut result = allocate_bitmap(data.len());
730
731 filter_i64_between(&data, 25, 75, &mut result);
732
733 assert_eq!(popcount(&result), 51);
735
736 for i in 0..100 {
737 assert_eq!(
738 get_bit(&result, i),
739 i >= 25 && i <= 75,
740 "Failed at index {}",
741 i
742 );
743 }
744 }
745
746 #[test]
747 fn test_filter_f64_gt() {
748 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
749 let mut result = allocate_bitmap(data.len());
750
751 filter_f64_gt(&data, 50.0, &mut result);
752
753 assert_eq!(popcount(&result), 49);
754 }
755
756 #[test]
757 fn test_bitmap_operations() {
758 let mut a = allocate_bitmap(64);
759 let mut b = allocate_bitmap(64);
760
761 for i in 0..32 {
762 set_bit(&mut a, i);
763 }
764 for i in 16..48 {
765 set_bit(&mut b, i);
766 }
767
768 let and_result = bitmap_and(&a, &b);
769 assert_eq!(popcount(&and_result), 16); let or_result = bitmap_or(&a, &b);
772 assert_eq!(popcount(&or_result), 48); }
774
775 #[test]
776 fn test_simd_info() {
777 let info = simd_info();
778 println!("SIMD capabilities: {:?}", info);
779 println!("Expected speedup: {}x", info.expected_speedup_i64());
780 }
781
782 #[test]
783 fn test_large_dataset() {
784 let data: Vec<i64> = (0..1_000_000).collect();
786 let mut result = allocate_bitmap(data.len());
787
788 let start = std::time::Instant::now();
789 filter_i64_gt(&data, 500_000, &mut result);
790 let elapsed = start.elapsed();
791
792 assert_eq!(popcount(&result), 499_999);
793 println!("Filtered 1M rows in {:?}", elapsed);
794 }
795}