1pub type FilterBitmap = Vec<u64>;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum FilterOp {
52 Equal,
53 NotEqual,
54 LessThan,
55 LessEqual,
56 GreaterThan,
57 GreaterEqual,
58 IsNull,
59 IsNotNull,
60}
61
62#[inline]
64pub fn allocate_bitmap(num_rows: usize) -> FilterBitmap {
65 vec![0u64; (num_rows + 63) / 64]
66}
67
68#[inline]
70pub fn set_bit(bitmap: &mut FilterBitmap, idx: usize) {
71 let word_idx = idx / 64;
72 let bit_idx = idx % 64;
73 if word_idx < bitmap.len() {
74 bitmap[word_idx] |= 1u64 << bit_idx;
75 }
76}
77
78#[inline]
80pub fn get_bit(bitmap: &FilterBitmap, idx: usize) -> bool {
81 let word_idx = idx / 64;
82 let bit_idx = idx % 64;
83 if word_idx < bitmap.len() {
84 (bitmap[word_idx] >> bit_idx) & 1 == 1
85 } else {
86 false
87 }
88}
89
90pub fn popcount(bitmap: &FilterBitmap) -> usize {
92 bitmap.iter().map(|w| w.count_ones() as usize).sum()
93}
94
95pub fn bitmap_and(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
97 a.iter().zip(b.iter()).map(|(x, y)| x & y).collect()
98}
99
100pub fn bitmap_or(a: &FilterBitmap, b: &FilterBitmap) -> FilterBitmap {
102 a.iter().zip(b.iter()).map(|(x, y)| x | y).collect()
103}
104
105pub fn bitmap_not(a: &FilterBitmap) -> FilterBitmap {
107 a.iter().map(|x| !x).collect()
108}
109
110pub fn filter_i64_gt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
116 for (idx, &value) in data.iter().enumerate() {
117 if value > threshold {
118 set_bit(result, idx);
119 }
120 }
121}
122
123pub fn filter_i64_ge_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
125 for (idx, &value) in data.iter().enumerate() {
126 if value >= threshold {
127 set_bit(result, idx);
128 }
129 }
130}
131
132pub fn filter_i64_lt_scalar(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
134 for (idx, &value) in data.iter().enumerate() {
135 if value < threshold {
136 set_bit(result, idx);
137 }
138 }
139}
140
141pub fn filter_i64_eq_scalar(data: &[i64], target: i64, result: &mut FilterBitmap) {
143 for (idx, &value) in data.iter().enumerate() {
144 if value == target {
145 set_bit(result, idx);
146 }
147 }
148}
149
150pub fn filter_i64_between_scalar(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
152 for (idx, &value) in data.iter().enumerate() {
153 if value >= low && value <= high {
154 set_bit(result, idx);
155 }
156 }
157}
158
159pub fn filter_f64_gt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
161 for (idx, &value) in data.iter().enumerate() {
162 if value > threshold {
163 set_bit(result, idx);
164 }
165 }
166}
167
168pub fn filter_f64_lt_scalar(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
170 for (idx, &value) in data.iter().enumerate() {
171 if value < threshold {
172 set_bit(result, idx);
173 }
174 }
175}
176
177#[cfg(target_arch = "x86_64")]
182mod avx2 {
183 use super::*;
184 use std::arch::x86_64::*;
185
186 #[inline]
188 pub fn is_available() -> bool {
189 is_x86_feature_detected!("avx2")
190 }
191
192 #[target_feature(enable = "avx2")]
196 pub unsafe fn filter_i64_gt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
197 let threshold_vec = _mm256_set1_epi64x(threshold);
198 let len = data.len();
199 let chunks = len / 4;
200
201 for chunk_idx in 0..chunks {
202 let offset = chunk_idx * 4;
203 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
204
205 let cmp = _mm256_cmpgt_epi64(data_vec, threshold_vec);
207
208 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
210
211 let word_idx = offset / 64;
213 let bit_offset = offset % 64;
214 if word_idx < result.len() {
215 result[word_idx] |= mask << bit_offset;
216 if bit_offset > 60 && word_idx + 1 < result.len() {
218 result[word_idx + 1] |= mask >> (64 - bit_offset);
219 }
220 }
221 }
222
223 let remainder_start = chunks * 4;
225 for idx in remainder_start..len {
226 if data[idx] > threshold {
227 set_bit(result, idx);
228 }
229 }
230 }
231
232 #[target_feature(enable = "avx2")]
234 pub unsafe fn filter_i64_lt_avx2(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
235 let threshold_vec = _mm256_set1_epi64x(threshold);
236 let len = data.len();
237 let chunks = len / 4;
238
239 for chunk_idx in 0..chunks {
240 let offset = chunk_idx * 4;
241 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
242
243 let cmp = _mm256_cmpgt_epi64(threshold_vec, data_vec);
245 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
246
247 let word_idx = offset / 64;
248 let bit_offset = offset % 64;
249 if word_idx < result.len() {
250 result[word_idx] |= mask << bit_offset;
251 if bit_offset > 60 && word_idx + 1 < result.len() {
252 result[word_idx + 1] |= mask >> (64 - bit_offset);
253 }
254 }
255 }
256
257 let remainder_start = chunks * 4;
258 for idx in remainder_start..len {
259 if data[idx] < threshold {
260 set_bit(result, idx);
261 }
262 }
263 }
264
265 #[target_feature(enable = "avx2")]
267 pub unsafe fn filter_i64_eq_avx2(data: &[i64], target: i64, result: &mut FilterBitmap) {
268 let target_vec = _mm256_set1_epi64x(target);
269 let len = data.len();
270 let chunks = len / 4;
271
272 for chunk_idx in 0..chunks {
273 let offset = chunk_idx * 4;
274 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
275
276 let cmp = _mm256_cmpeq_epi64(data_vec, target_vec);
277 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
278
279 let word_idx = offset / 64;
280 let bit_offset = offset % 64;
281 if word_idx < result.len() {
282 result[word_idx] |= mask << bit_offset;
283 if bit_offset > 60 && word_idx + 1 < result.len() {
284 result[word_idx + 1] |= mask >> (64 - bit_offset);
285 }
286 }
287 }
288
289 let remainder_start = chunks * 4;
290 for idx in remainder_start..len {
291 if data[idx] == target {
292 set_bit(result, idx);
293 }
294 }
295 }
296
297 #[target_feature(enable = "avx2")]
299 pub unsafe fn filter_i64_between_avx2(
300 data: &[i64],
301 low: i64,
302 high: i64,
303 result: &mut FilterBitmap,
304 ) {
305 let low_vec = _mm256_set1_epi64x(low - 1); let high_vec = _mm256_set1_epi64x(high);
307 let len = data.len();
308 let chunks = len / 4;
309
310 for chunk_idx in 0..chunks {
311 let offset = chunk_idx * 4;
312 let data_vec = _mm256_loadu_si256(data.as_ptr().add(offset) as *const __m256i);
313
314 let cmp_low = _mm256_cmpgt_epi64(data_vec, low_vec);
316 let cmp_high = _mm256_cmpgt_epi64(high_vec, data_vec);
317 let cmp_high_eq = _mm256_cmpeq_epi64(data_vec, high_vec);
318 let cmp_high_final = _mm256_or_si256(cmp_high, cmp_high_eq);
319 let cmp = _mm256_and_si256(cmp_low, cmp_high_final);
320
321 let mask = _mm256_movemask_pd(_mm256_castsi256_pd(cmp)) as u64;
322
323 let word_idx = offset / 64;
324 let bit_offset = offset % 64;
325 if word_idx < result.len() {
326 result[word_idx] |= mask << bit_offset;
327 if bit_offset > 60 && word_idx + 1 < result.len() {
328 result[word_idx + 1] |= mask >> (64 - bit_offset);
329 }
330 }
331 }
332
333 let remainder_start = chunks * 4;
334 for idx in remainder_start..len {
335 let v = data[idx];
336 if v >= low && v <= high {
337 set_bit(result, idx);
338 }
339 }
340 }
341
342 #[target_feature(enable = "avx2")]
344 pub unsafe fn filter_f64_gt_avx2(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
345 let threshold_vec = _mm256_set1_pd(threshold);
346 let len = data.len();
347 let chunks = len / 4;
348
349 for chunk_idx in 0..chunks {
350 let offset = chunk_idx * 4;
351 let data_vec = _mm256_loadu_pd(data.as_ptr().add(offset));
352
353 let cmp = _mm256_cmp_pd(data_vec, threshold_vec, _CMP_GT_OQ);
354 let mask = _mm256_movemask_pd(cmp) as u64;
355
356 let word_idx = offset / 64;
357 let bit_offset = offset % 64;
358 if word_idx < result.len() {
359 result[word_idx] |= mask << bit_offset;
360 if bit_offset > 60 && word_idx + 1 < result.len() {
361 result[word_idx + 1] |= mask >> (64 - bit_offset);
362 }
363 }
364 }
365
366 let remainder_start = chunks * 4;
367 for idx in remainder_start..len {
368 if data[idx] > threshold {
369 set_bit(result, idx);
370 }
371 }
372 }
373}
374
375#[cfg(target_arch = "aarch64")]
380mod neon {
381 use super::*;
382 use std::arch::aarch64::*;
383
384 #[inline]
386 #[allow(dead_code)]
387 pub fn is_available() -> bool {
388 true
389 }
390
391 #[target_feature(enable = "neon")]
395 pub unsafe fn filter_i64_gt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
396 let threshold_vec = vdupq_n_s64(threshold);
397 let len = data.len();
398 let chunks = len / 2;
399
400 for chunk_idx in 0..chunks {
401 let offset = chunk_idx * 2;
402 let data_vec = vld1q_s64(data.as_ptr().add(offset));
403
404 let cmp = vcgtq_s64(data_vec, threshold_vec);
406
407 let mask_low = vgetq_lane_u64(cmp, 0);
409 let mask_high = vgetq_lane_u64(cmp, 1);
410 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
411
412 let word_idx = offset / 64;
413 let bit_offset = offset % 64;
414 if word_idx < result.len() {
415 result[word_idx] |= mask << bit_offset;
416 }
417 }
418
419 let remainder_start = chunks * 2;
421 for idx in remainder_start..len {
422 if data[idx] > threshold {
423 set_bit(result, idx);
424 }
425 }
426 }}
427
428 #[target_feature(enable = "neon")]
430 pub unsafe fn filter_i64_lt_neon(data: &[i64], threshold: i64, result: &mut FilterBitmap) { unsafe {
431 let threshold_vec = vdupq_n_s64(threshold);
432 let len = data.len();
433 let chunks = len / 2;
434
435 for chunk_idx in 0..chunks {
436 let offset = chunk_idx * 2;
437 let data_vec = vld1q_s64(data.as_ptr().add(offset));
438
439 let cmp = vcltq_s64(data_vec, threshold_vec);
440
441 let mask_low = vgetq_lane_u64(cmp, 0);
443 let mask_high = vgetq_lane_u64(cmp, 1);
444 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
445
446 let word_idx = offset / 64;
447 let bit_offset = offset % 64;
448 if word_idx < result.len() {
449 result[word_idx] |= mask << bit_offset;
450 }
451 }
452
453 let remainder_start = chunks * 2;
454 for idx in remainder_start..len {
455 if data[idx] < threshold {
456 set_bit(result, idx);
457 }
458 }
459 }}
460
461 #[target_feature(enable = "neon")]
463 pub unsafe fn filter_i64_eq_neon(data: &[i64], target: i64, result: &mut FilterBitmap) { unsafe {
464 let target_vec = vdupq_n_s64(target);
465 let len = data.len();
466 let chunks = len / 2;
467
468 for chunk_idx in 0..chunks {
469 let offset = chunk_idx * 2;
470 let data_vec = vld1q_s64(data.as_ptr().add(offset));
471
472 let cmp = vceqq_s64(data_vec, target_vec);
473
474 let mask_low = vgetq_lane_u64(cmp, 0);
476 let mask_high = vgetq_lane_u64(cmp, 1);
477 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
478
479 let word_idx = offset / 64;
480 let bit_offset = offset % 64;
481 if word_idx < result.len() {
482 result[word_idx] |= mask << bit_offset;
483 }
484 }
485
486 let remainder_start = chunks * 2;
487 for idx in remainder_start..len {
488 if data[idx] == target {
489 set_bit(result, idx);
490 }
491 }
492 }}
493
494 #[target_feature(enable = "neon")]
496 pub unsafe fn filter_f64_gt_neon(data: &[f64], threshold: f64, result: &mut FilterBitmap) { unsafe {
497 let threshold_vec = vdupq_n_f64(threshold);
498 let len = data.len();
499 let chunks = len / 2;
500
501 for chunk_idx in 0..chunks {
502 let offset = chunk_idx * 2;
503 let data_vec = vld1q_f64(data.as_ptr().add(offset));
504
505 let cmp = vcgtq_f64(data_vec, threshold_vec);
506
507 let mask_low = vgetq_lane_u64(cmp, 0);
508 let mask_high = vgetq_lane_u64(cmp, 1);
509 let mask = ((mask_low != 0) as u64) | (((mask_high != 0) as u64) << 1);
510
511 let word_idx = offset / 64;
512 let bit_offset = offset % 64;
513 if word_idx < result.len() {
514 result[word_idx] |= mask << bit_offset;
515 }
516 }
517
518 let remainder_start = chunks * 2;
519 for idx in remainder_start..len {
520 if data[idx] > threshold {
521 set_bit(result, idx);
522 }
523 }
524 }}
525}
526
527pub fn filter_i64_gt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
533 #[cfg(target_arch = "x86_64")]
534 {
535 if avx2::is_available() {
536 unsafe {
537 avx2::filter_i64_gt_avx2(data, threshold, result);
538 }
539 return;
540 }
541 }
542
543 #[cfg(target_arch = "aarch64")]
544 {
545 unsafe {
546 neon::filter_i64_gt_neon(data, threshold, result);
547 }
548 return;
549 }
550
551 #[allow(unreachable_code)]
552 filter_i64_gt_scalar(data, threshold, result);
553}
554
555pub fn filter_i64_lt(data: &[i64], threshold: i64, result: &mut FilterBitmap) {
557 #[cfg(target_arch = "x86_64")]
558 {
559 if avx2::is_available() {
560 unsafe {
561 avx2::filter_i64_lt_avx2(data, threshold, result);
562 }
563 return;
564 }
565 }
566
567 #[cfg(target_arch = "aarch64")]
568 {
569 unsafe {
570 neon::filter_i64_lt_neon(data, threshold, result);
571 }
572 return;
573 }
574
575 #[allow(unreachable_code)]
576 filter_i64_lt_scalar(data, threshold, result);
577}
578
579pub fn filter_i64_eq(data: &[i64], target: i64, result: &mut FilterBitmap) {
581 #[cfg(target_arch = "x86_64")]
582 {
583 if avx2::is_available() {
584 unsafe {
585 avx2::filter_i64_eq_avx2(data, target, result);
586 }
587 return;
588 }
589 }
590
591 #[cfg(target_arch = "aarch64")]
592 {
593 unsafe {
594 neon::filter_i64_eq_neon(data, target, result);
595 }
596 return;
597 }
598
599 #[allow(unreachable_code)]
600 filter_i64_eq_scalar(data, target, result);
601}
602
603pub fn filter_i64_between(data: &[i64], low: i64, high: i64, result: &mut FilterBitmap) {
605 #[cfg(target_arch = "x86_64")]
606 {
607 if avx2::is_available() {
608 unsafe {
609 avx2::filter_i64_between_avx2(data, low, high, result);
610 }
611 return;
612 }
613 }
614
615 filter_i64_between_scalar(data, low, high, result);
617}
618
619pub fn filter_f64_gt(data: &[f64], threshold: f64, result: &mut FilterBitmap) {
621 #[cfg(target_arch = "x86_64")]
622 {
623 if avx2::is_available() {
624 unsafe {
625 avx2::filter_f64_gt_avx2(data, threshold, result);
626 }
627 return;
628 }
629 }
630
631 #[cfg(target_arch = "aarch64")]
632 {
633 unsafe {
634 neon::filter_f64_gt_neon(data, threshold, result);
635 }
636 return;
637 }
638
639 #[allow(unreachable_code)]
640 filter_f64_gt_scalar(data, threshold, result);
641}
642
643pub fn simd_info() -> SimdInfo {
645 SimdInfo {
646 #[cfg(target_arch = "x86_64")]
647 has_avx2: is_x86_feature_detected!("avx2"),
648 #[cfg(target_arch = "x86_64")]
649 has_avx512f: is_x86_feature_detected!("avx512f"),
650 #[cfg(not(target_arch = "x86_64"))]
651 has_avx2: false,
652 #[cfg(not(target_arch = "x86_64"))]
653 has_avx512f: false,
654 #[cfg(target_arch = "aarch64")]
655 has_neon: true,
656 #[cfg(not(target_arch = "aarch64"))]
657 has_neon: false,
658 }
659}
660
661#[derive(Debug, Clone)]
663pub struct SimdInfo {
664 pub has_avx2: bool,
665 pub has_avx512f: bool,
666 pub has_neon: bool,
667}
668
669impl SimdInfo {
670 pub fn expected_speedup_i64(&self) -> f64 {
672 if self.has_avx512f {
673 8.0
674 } else if self.has_avx2 {
675 4.0
676 } else if self.has_neon {
677 2.0
678 } else {
679 1.0
680 }
681 }
682}
683
684#[cfg(test)]
685mod tests {
686 use super::*;
687
688 #[test]
689 fn test_filter_i64_gt() {
690 let data: Vec<i64> = (0..100).collect();
691 let mut result = allocate_bitmap(data.len());
692
693 filter_i64_gt(&data, 50, &mut result);
694
695 assert_eq!(popcount(&result), 49);
697
698 for i in 0..100 {
699 assert_eq!(get_bit(&result, i), i > 50, "Failed at index {}", i);
700 }
701 }
702
703 #[test]
704 fn test_filter_i64_lt() {
705 let data: Vec<i64> = (0..100).collect();
706 let mut result = allocate_bitmap(data.len());
707
708 filter_i64_lt(&data, 50, &mut result);
709
710 assert_eq!(popcount(&result), 50);
712
713 for i in 0..100 {
714 assert_eq!(get_bit(&result, i), i < 50, "Failed at index {}", i);
715 }
716 }
717
718 #[test]
719 fn test_filter_i64_eq() {
720 let data: Vec<i64> = (0..100).collect();
721 let mut result = allocate_bitmap(data.len());
722
723 filter_i64_eq(&data, 42, &mut result);
724
725 assert_eq!(popcount(&result), 1);
726 assert!(get_bit(&result, 42));
727 }
728
729 #[test]
730 fn test_filter_i64_between() {
731 let data: Vec<i64> = (0..100).collect();
732 let mut result = allocate_bitmap(data.len());
733
734 filter_i64_between(&data, 25, 75, &mut result);
735
736 assert_eq!(popcount(&result), 51);
738
739 for i in 0..100 {
740 assert_eq!(
741 get_bit(&result, i),
742 i >= 25 && i <= 75,
743 "Failed at index {}",
744 i
745 );
746 }
747 }
748
749 #[test]
750 fn test_filter_f64_gt() {
751 let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
752 let mut result = allocate_bitmap(data.len());
753
754 filter_f64_gt(&data, 50.0, &mut result);
755
756 assert_eq!(popcount(&result), 49);
757 }
758
759 #[test]
760 fn test_bitmap_operations() {
761 let mut a = allocate_bitmap(64);
762 let mut b = allocate_bitmap(64);
763
764 for i in 0..32 {
765 set_bit(&mut a, i);
766 }
767 for i in 16..48 {
768 set_bit(&mut b, i);
769 }
770
771 let and_result = bitmap_and(&a, &b);
772 assert_eq!(popcount(&and_result), 16); let or_result = bitmap_or(&a, &b);
775 assert_eq!(popcount(&or_result), 48); }
777
778 #[test]
779 fn test_simd_info() {
780 let info = simd_info();
781 println!("SIMD capabilities: {:?}", info);
782 println!("Expected speedup: {}x", info.expected_speedup_i64());
783 }
784
785 #[test]
786 fn test_large_dataset() {
787 let data: Vec<i64> = (0..1_000_000).collect();
789 let mut result = allocate_bitmap(data.len());
790
791 let start = std::time::Instant::now();
792 filter_i64_gt(&data, 500_000, &mut result);
793 let elapsed = start.elapsed();
794
795 assert_eq!(popcount(&result), 499_999);
796 println!("Filtered 1M rows in {:?}", elapsed);
797 }
798}