1use crate::format::BlockStats;
12
13#[derive(Debug, Clone)]
15pub enum PredicateValue {
16 Numeric(f64),
18 String(String),
20}
21
22#[derive(Debug, Clone)]
24pub struct ScanPredicate {
25 pub col_idx: usize,
27 pub op: PredicateOp,
29 pub value: PredicateValue,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum PredicateOp {
36 Gt,
38 Gte,
40 Lt,
42 Lte,
44 Eq,
46 Ne,
48}
49
50impl ScanPredicate {
51 pub fn gt(col_idx: usize, value: f64) -> Self {
55 Self {
56 col_idx,
57 op: PredicateOp::Gt,
58 value: PredicateValue::Numeric(value),
59 }
60 }
61
62 pub fn gte(col_idx: usize, value: f64) -> Self {
64 Self {
65 col_idx,
66 op: PredicateOp::Gte,
67 value: PredicateValue::Numeric(value),
68 }
69 }
70
71 pub fn lt(col_idx: usize, value: f64) -> Self {
73 Self {
74 col_idx,
75 op: PredicateOp::Lt,
76 value: PredicateValue::Numeric(value),
77 }
78 }
79
80 pub fn lte(col_idx: usize, value: f64) -> Self {
82 Self {
83 col_idx,
84 op: PredicateOp::Lte,
85 value: PredicateValue::Numeric(value),
86 }
87 }
88
89 pub fn eq(col_idx: usize, value: f64) -> Self {
91 Self {
92 col_idx,
93 op: PredicateOp::Eq,
94 value: PredicateValue::Numeric(value),
95 }
96 }
97
98 pub fn not_eq(col_idx: usize, value: f64) -> Self {
101 Self {
102 col_idx,
103 op: PredicateOp::Ne,
104 value: PredicateValue::Numeric(value),
105 }
106 }
107
108 pub fn str_eq(col_idx: usize, value: impl Into<String>) -> Self {
112 Self {
113 col_idx,
114 op: PredicateOp::Eq,
115 value: PredicateValue::String(value.into()),
116 }
117 }
118
119 pub fn str_not_eq(col_idx: usize, value: impl Into<String>) -> Self {
121 Self {
122 col_idx,
123 op: PredicateOp::Ne,
124 value: PredicateValue::String(value.into()),
125 }
126 }
127
128 pub fn str_gt(col_idx: usize, value: impl Into<String>) -> Self {
130 Self {
131 col_idx,
132 op: PredicateOp::Gt,
133 value: PredicateValue::String(value.into()),
134 }
135 }
136
137 pub fn str_gte(col_idx: usize, value: impl Into<String>) -> Self {
139 Self {
140 col_idx,
141 op: PredicateOp::Gte,
142 value: PredicateValue::String(value.into()),
143 }
144 }
145
146 pub fn str_lt(col_idx: usize, value: impl Into<String>) -> Self {
148 Self {
149 col_idx,
150 op: PredicateOp::Lt,
151 value: PredicateValue::String(value.into()),
152 }
153 }
154
155 pub fn str_lte(col_idx: usize, value: impl Into<String>) -> Self {
157 Self {
158 col_idx,
159 op: PredicateOp::Lte,
160 value: PredicateValue::String(value.into()),
161 }
162 }
163
164 pub fn can_skip_block(&self, stats: &BlockStats) -> bool {
171 match &self.value {
172 PredicateValue::Numeric(v) => can_skip_numeric(self.op, *v, stats),
173 PredicateValue::String(v) => can_skip_string(self.op, v, stats),
174 }
175 }
176}
177
178fn can_skip_numeric(op: PredicateOp, value: f64, stats: &BlockStats) -> bool {
180 if stats.min.is_nan() || stats.max.is_nan() {
182 return false;
183 }
184
185 match op {
186 PredicateOp::Gt => stats.max <= value,
188 PredicateOp::Gte => stats.max < value,
190 PredicateOp::Lt => stats.min >= value,
192 PredicateOp::Lte => stats.min > value,
194 PredicateOp::Eq => value < stats.min || value > stats.max,
196 PredicateOp::Ne => stats.min == value && stats.max == value,
198 }
199}
200
201fn can_skip_string(op: PredicateOp, value: &str, stats: &BlockStats) -> bool {
203 let (Some(smin), Some(smax)) = (&stats.str_min, &stats.str_max) else {
204 return false;
206 };
207
208 let skip_by_range = match op {
209 PredicateOp::Gt => smax.as_str() <= value,
211 PredicateOp::Gte => smax.as_str() < value,
213 PredicateOp::Lt => smin.as_str() >= value,
215 PredicateOp::Lte => smin.as_str() > value,
217 PredicateOp::Eq => value < smin.as_str() || value > smax.as_str(),
219 PredicateOp::Ne => smin.as_str() == value && smax.as_str() == value,
221 };
222
223 if skip_by_range {
224 return true;
225 }
226
227 if op == PredicateOp::Eq
229 && let Some(ref bloom_bytes) = stats.bloom
230 && !bloom_may_contain(bloom_bytes, value)
231 {
232 return true; }
234
235 false
236}
237
238pub const BLOOM_BYTES: usize = 256;
242
243const BLOOM_HASH_COUNT: u32 = 3;
245
246const FNV_OFFSET: u64 = 14_695_981_039_346_656_037;
248const FNV_PRIME: u64 = 1_099_511_628_211;
250
251fn bloom_bit_pos(value: &str, hash_idx: u32) -> usize {
256 let mut hash = FNV_OFFSET ^ (hash_idx as u64).wrapping_mul(FNV_PRIME);
258 for byte in value.bytes() {
259 hash ^= byte as u64;
260 hash = hash.wrapping_mul(FNV_PRIME);
261 }
262 (hash as usize) & (BLOOM_BYTES * 8 - 1)
264}
265
266pub fn bloom_insert(bloom: &mut [u8], value: &str) {
270 for i in 0..BLOOM_HASH_COUNT {
271 let bit = bloom_bit_pos(value, i);
272 bloom[bit / 8] |= 1 << (bit % 8);
273 }
274}
275
276pub fn bloom_may_contain(bloom: &[u8], value: &str) -> bool {
281 if bloom.len() < BLOOM_BYTES {
282 return true;
285 }
286 for i in 0..BLOOM_HASH_COUNT {
287 let bit = bloom_bit_pos(value, i);
288 if bloom[bit / 8] & (1 << (bit % 8)) == 0 {
289 return false;
290 }
291 }
292 true
293}
294
295pub fn build_bloom(values: &[&str]) -> Vec<u8> {
299 let mut bloom = vec![0u8; BLOOM_BYTES];
300 for v in values {
301 bloom_insert(&mut bloom, v);
302 }
303 bloom
304}
305
306#[cfg(test)]
307mod tests {
308 use super::*;
309
310 fn stats(min: f64, max: f64) -> BlockStats {
311 BlockStats::numeric(min, max, 0, 1024)
312 }
313
314 #[test]
315 fn gt_predicate() {
316 let pred = ScanPredicate::gt(0, 50.0);
317 assert!(pred.can_skip_block(&stats(10.0, 40.0)));
319 assert!(!pred.can_skip_block(&stats(10.0, 60.0)));
321 assert!(pred.can_skip_block(&stats(10.0, 50.0)));
323 }
324
325 #[test]
326 fn gte_predicate() {
327 let pred = ScanPredicate::gte(0, 50.0);
328 assert!(pred.can_skip_block(&stats(10.0, 49.0)));
330 assert!(!pred.can_skip_block(&stats(10.0, 50.0)));
332 }
333
334 #[test]
335 fn lt_predicate() {
336 let pred = ScanPredicate::lt(0, 50.0);
337 assert!(pred.can_skip_block(&stats(60.0, 100.0)));
339 assert!(!pred.can_skip_block(&stats(40.0, 100.0)));
341 }
342
343 #[test]
344 fn eq_predicate() {
345 let pred = ScanPredicate::eq(0, 50.0);
346 assert!(pred.can_skip_block(&stats(10.0, 40.0)));
348 assert!(pred.can_skip_block(&stats(60.0, 100.0)));
350 assert!(!pred.can_skip_block(&stats(40.0, 60.0)));
352 }
353
354 #[test]
355 fn ne_predicate() {
356 let pred = ScanPredicate::not_eq(0, 50.0);
357 assert!(pred.can_skip_block(&stats(50.0, 50.0)));
359 assert!(!pred.can_skip_block(&stats(40.0, 60.0)));
361 }
362
363 #[test]
364 fn non_numeric_never_skipped_by_numeric_pred() {
365 let pred = ScanPredicate::gt(0, 50.0);
366 let nan_stats = BlockStats::non_numeric(0, 1024);
367 assert!(!pred.can_skip_block(&nan_stats));
368 }
369
370 fn str_stats(smin: &str, smax: &str) -> BlockStats {
373 BlockStats::string_block(0, 1024, Some(smin.into()), Some(smax.into()), None)
374 }
375
376 #[test]
377 fn string_eq_skip_below_range() {
378 let stats = str_stats("apple", "banana");
380 assert!(ScanPredicate::str_eq(0, "aaa").can_skip_block(&stats));
381 }
382
383 #[test]
384 fn string_eq_skip_above_range() {
385 let stats = str_stats("apple", "banana");
387 assert!(ScanPredicate::str_eq(0, "zzz").can_skip_block(&stats));
388 }
389
390 #[test]
391 fn string_eq_no_skip_in_range() {
392 let stats = str_stats("apple", "banana");
394 assert!(!ScanPredicate::str_eq(0, "avocado").can_skip_block(&stats));
395 }
396
397 #[test]
398 fn string_gt_skip() {
399 let stats = str_stats("apple", "fig");
401 assert!(ScanPredicate::str_gt(0, "fig").can_skip_block(&stats));
402 assert!(!ScanPredicate::str_gt(0, "egg").can_skip_block(&stats));
404 }
405
406 #[test]
407 fn string_lt_skip() {
408 let stats = str_stats("mango", "pear");
410 assert!(ScanPredicate::str_lt(0, "mango").can_skip_block(&stats));
411 assert!(!ScanPredicate::str_lt(0, "orange").can_skip_block(&stats));
413 }
414
415 #[test]
416 fn string_gte_skip() {
417 let stats = str_stats("ant", "cat");
419 assert!(ScanPredicate::str_gte(0, "dog").can_skip_block(&stats));
420 assert!(!ScanPredicate::str_gte(0, "cat").can_skip_block(&stats));
421 }
422
423 #[test]
424 fn string_lte_skip() {
425 let stats = str_stats("zebra", "zoo");
427 assert!(ScanPredicate::str_lte(0, "yak").can_skip_block(&stats));
428 assert!(!ScanPredicate::str_lte(0, "zebra").can_skip_block(&stats));
429 }
430
431 #[test]
432 fn string_ne_skip() {
433 let stats = str_stats("exact", "exact");
435 assert!(ScanPredicate::str_not_eq(0, "exact").can_skip_block(&stats));
436 let stats2 = str_stats("a", "z");
438 assert!(!ScanPredicate::str_not_eq(0, "exact").can_skip_block(&stats2));
439 }
440
441 #[test]
442 fn string_no_zone_map_no_skip() {
443 let stats = BlockStats::non_numeric(0, 1024);
445 assert!(!ScanPredicate::str_eq(0, "anything").can_skip_block(&stats));
446 }
447
448 #[test]
451 fn bloom_insert_and_query() {
452 let values = ["hello", "world", "foo"];
453 let bloom = build_bloom(&values);
454 assert!(bloom_may_contain(&bloom, "hello"));
455 assert!(bloom_may_contain(&bloom, "world"));
456 assert!(bloom_may_contain(&bloom, "foo"));
457 }
458
459 #[test]
460 fn bloom_absent_value_rejected() {
461 let values = ["alpha", "beta", "gamma"];
465 let bloom = build_bloom(&values);
466 let delta_present = bloom_may_contain(&bloom, "delta");
469 if !delta_present {
474 assert!(!bloom_may_contain(&bloom, "delta"));
475 }
476 }
477
478 #[test]
479 fn bloom_eq_skip_via_filter() {
480 let bloom = build_bloom(&["apple", "apricot", "banana"]);
483 let stats = BlockStats::string_block(
486 0,
487 1024,
488 Some("apple".into()),
489 Some("banana".into()),
490 Some(bloom.clone()),
491 );
492 let absent = !bloom_may_contain(&bloom, "avocado");
494 if absent {
495 assert!(ScanPredicate::str_eq(0, "avocado").can_skip_block(&stats));
496 }
497 assert!(!ScanPredicate::str_eq(0, "apple").can_skip_block(&stats));
499 }
500}