Skip to main content

cjc_data/
column_meta.rs

1//! Column-level metadata for query optimization.
2//!
3//! Zone maps (min/max per column) and sorted flags enable
4//! the TidyView optimizer to skip unnecessary work.
5
6use crate::Column;
7use std::collections::BTreeSet;
8
9/// Comparison operator for filter predicates.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum CompOp {
12    Gt,
13    Lt,
14    Ge,
15    Le,
16    Eq,
17}
18
19/// Statistical metadata for a single column.
20#[derive(Debug, Clone)]
21pub struct ColumnStats {
22    /// Minimum value (for numeric columns)
23    pub min_f64: Option<f64>,
24    /// Maximum value (for numeric columns)
25    pub max_f64: Option<f64>,
26    /// Minimum value (for int columns)
27    pub min_i64: Option<i64>,
28    /// Maximum value (for int columns)
29    pub max_i64: Option<i64>,
30    /// Whether the column is sorted ascending
31    pub sorted_asc: bool,
32    /// Whether the column is sorted descending
33    pub sorted_desc: bool,
34    /// Number of distinct values (exact for small columns, estimated for large)
35    pub n_distinct: Option<usize>,
36    /// Number of null/NaN values
37    pub n_null: usize,
38    /// Total number of rows
39    pub nrows: usize,
40}
41
42impl ColumnStats {
43    /// Compute stats for a column.
44    pub fn compute(col: &Column) -> Self {
45        match col {
46            Column::Float(v) => Self::compute_float(v),
47            Column::Int(v) => Self::compute_int(v),
48            Column::Str(v) => Self::compute_str(v),
49            Column::Bool(v) => Self::compute_bool(v),
50            Column::Categorical { codes, levels } => Self::compute_categorical(codes, levels),
51            Column::DateTime(v) => Self::compute_datetime(v),
52        }
53    }
54
55    fn compute_float(v: &[f64]) -> Self {
56        if v.is_empty() {
57            return Self::empty();
58        }
59
60        let mut min = f64::INFINITY;
61        let mut max = f64::NEG_INFINITY;
62        let mut n_null = 0usize;
63        let mut sorted_asc = true;
64        let mut sorted_desc = true;
65        let mut prev: Option<f64> = None;
66
67        for &val in v {
68            if val.is_nan() {
69                n_null += 1;
70                // NaN breaks sort order
71                if prev.is_some() {
72                    sorted_asc = false;
73                    sorted_desc = false;
74                }
75            } else {
76                if val < min {
77                    min = val;
78                }
79                if val > max {
80                    max = val;
81                }
82                if let Some(p) = prev {
83                    if !p.is_nan() {
84                        if val < p {
85                            sorted_asc = false;
86                        }
87                        if val > p {
88                            sorted_desc = false;
89                        }
90                    }
91                }
92            }
93            prev = Some(val);
94        }
95
96        ColumnStats {
97            min_f64: if min.is_finite() { Some(min) } else { None },
98            max_f64: if max.is_finite() { Some(max) } else { None },
99            min_i64: None,
100            max_i64: None,
101            sorted_asc,
102            sorted_desc,
103            n_distinct: None,
104            n_null,
105            nrows: v.len(),
106        }
107    }
108
109    fn compute_int(v: &[i64]) -> Self {
110        if v.is_empty() {
111            return Self::empty();
112        }
113
114        let mut min = i64::MAX;
115        let mut max = i64::MIN;
116        let mut sorted_asc = true;
117        let mut sorted_desc = true;
118
119        for i in 0..v.len() {
120            let val = v[i];
121            if val < min {
122                min = val;
123            }
124            if val > max {
125                max = val;
126            }
127            if i > 0 {
128                if val < v[i - 1] {
129                    sorted_asc = false;
130                }
131                if val > v[i - 1] {
132                    sorted_desc = false;
133                }
134            }
135        }
136
137        ColumnStats {
138            min_f64: None,
139            max_f64: None,
140            min_i64: Some(min),
141            max_i64: Some(max),
142            sorted_asc,
143            sorted_desc,
144            n_distinct: None,
145            n_null: 0,
146            nrows: v.len(),
147        }
148    }
149
150    fn compute_str(v: &[String]) -> Self {
151        if v.is_empty() {
152            return Self::empty();
153        }
154
155        let mut sorted_asc = true;
156        let mut sorted_desc = true;
157        // Use BTreeSet for deterministic distinct counting
158        let mut distinct = BTreeSet::new();
159
160        for i in 0..v.len() {
161            distinct.insert(v[i].as_str());
162            if i > 0 {
163                if v[i] < v[i - 1] {
164                    sorted_asc = false;
165                }
166                if v[i] > v[i - 1] {
167                    sorted_desc = false;
168                }
169            }
170        }
171
172        ColumnStats {
173            min_f64: None,
174            max_f64: None,
175            min_i64: None,
176            max_i64: None,
177            sorted_asc,
178            sorted_desc,
179            n_distinct: Some(distinct.len()),
180            n_null: 0,
181            nrows: v.len(),
182        }
183    }
184
185    fn compute_bool(v: &[bool]) -> Self {
186        if v.is_empty() {
187            return Self::empty();
188        }
189
190        let mut sorted_asc = true;
191        let mut sorted_desc = true;
192        let mut has_true = false;
193        let mut has_false = false;
194
195        for i in 0..v.len() {
196            if v[i] {
197                has_true = true;
198            } else {
199                has_false = true;
200            }
201            if i > 0 {
202                // false < true in Rust's Ord
203                if v[i] < v[i - 1] {
204                    sorted_asc = false;
205                }
206                if v[i] > v[i - 1] {
207                    sorted_desc = false;
208                }
209            }
210        }
211
212        let n_distinct = match (has_true, has_false) {
213            (true, true) => 2,
214            (true, false) | (false, true) => 1,
215            _ => 0,
216        };
217
218        ColumnStats {
219            min_f64: None,
220            max_f64: None,
221            min_i64: None,
222            max_i64: None,
223            sorted_asc,
224            sorted_desc,
225            n_distinct: Some(n_distinct),
226            n_null: 0,
227            nrows: v.len(),
228        }
229    }
230
231    fn compute_categorical(codes: &[u32], levels: &[String]) -> Self {
232        if codes.is_empty() {
233            return Self::empty();
234        }
235
236        let mut min = u32::MAX;
237        let mut max = u32::MIN;
238        let mut sorted_asc = true;
239        let mut sorted_desc = true;
240        let mut distinct = BTreeSet::new();
241
242        for i in 0..codes.len() {
243            let c = codes[i];
244            if c < min {
245                min = c;
246            }
247            if c > max {
248                max = c;
249            }
250            distinct.insert(c);
251            if i > 0 {
252                if c < codes[i - 1] {
253                    sorted_asc = false;
254                }
255                if c > codes[i - 1] {
256                    sorted_desc = false;
257                }
258            }
259        }
260
261        ColumnStats {
262            min_f64: None,
263            max_f64: None,
264            min_i64: Some(min as i64),
265            max_i64: Some(max as i64),
266            sorted_asc,
267            sorted_desc,
268            n_distinct: Some(distinct.len().min(levels.len())),
269            n_null: 0,
270            nrows: codes.len(),
271        }
272    }
273
274    fn compute_datetime(v: &[i64]) -> Self {
275        // DateTime columns store epoch millis as i64; reuse int logic.
276        Self::compute_int(v)
277    }
278
279    fn empty() -> Self {
280        ColumnStats {
281            min_f64: None,
282            max_f64: None,
283            min_i64: None,
284            max_i64: None,
285            sorted_asc: true,  // vacuously sorted
286            sorted_desc: true, // vacuously sorted
287            n_distinct: Some(0),
288            n_null: 0,
289            nrows: 0,
290        }
291    }
292
293    // ── Zone-map skip predicates ──────────────────────────────────────
294
295    /// Check if a filter `col > threshold` can be ruled out (all values <= threshold).
296    pub fn can_skip_gt(&self, threshold: f64) -> bool {
297        self.max_f64.map_or(false, |max| max <= threshold)
298    }
299
300    /// Check if a filter `col < threshold` can be ruled out (all values >= threshold).
301    pub fn can_skip_lt(&self, threshold: f64) -> bool {
302        self.min_f64.map_or(false, |min| min >= threshold)
303    }
304
305    /// Check if a filter `col >= threshold` can be ruled out (all values < threshold).
306    pub fn can_skip_ge(&self, threshold: f64) -> bool {
307        self.max_f64.map_or(false, |max| max < threshold)
308    }
309
310    /// Check if a filter `col <= threshold` can be ruled out (all values > threshold).
311    pub fn can_skip_le(&self, threshold: f64) -> bool {
312        self.min_f64.map_or(false, |min| min > threshold)
313    }
314
315    /// Check if a filter `col == value` can be ruled out (value outside [min, max]).
316    pub fn can_skip_eq_f64(&self, value: f64) -> bool {
317        match (self.min_f64, self.max_f64) {
318            (Some(min), Some(max)) => value < min || value > max,
319            _ => false,
320        }
321    }
322
323    /// Check if a filter `col > threshold` can be ruled out for int columns.
324    pub fn can_skip_gt_i64(&self, threshold: i64) -> bool {
325        self.max_i64.map_or(false, |max| max <= threshold)
326    }
327
328    /// Check if a filter `col < threshold` can be ruled out for int columns.
329    pub fn can_skip_lt_i64(&self, threshold: i64) -> bool {
330        self.min_i64.map_or(false, |min| min >= threshold)
331    }
332
333    /// Check if a filter `col == value` can be ruled out for int columns.
334    pub fn can_skip_eq_i64(&self, value: i64) -> bool {
335        match (self.min_i64, self.max_i64) {
336            (Some(min), Some(max)) => value < min || value > max,
337            _ => false,
338        }
339    }
340
341    // ── Sorted-column helpers ─────────────────────────────────────────
342
343    /// Check if column is sorted (ascending or descending).
344    pub fn is_sorted(&self) -> bool {
345        self.sorted_asc || self.sorted_desc
346    }
347
348    /// For a sorted ascending float column, find the row range satisfying a
349    /// comparison predicate via binary search. Returns `(start, end)` where
350    /// matching rows are `start..end`.
351    ///
352    /// Caller must ensure the slice is actually sorted ascending.
353    pub fn binary_search_range_f64(col: &[f64], op: CompOp, threshold: f64) -> (usize, usize) {
354        match op {
355            CompOp::Gt => {
356                let start = col.partition_point(|&v| v <= threshold);
357                (start, col.len())
358            }
359            CompOp::Ge => {
360                let start = col.partition_point(|&v| v < threshold);
361                (start, col.len())
362            }
363            CompOp::Lt => {
364                let end = col.partition_point(|&v| v < threshold);
365                (0, end)
366            }
367            CompOp::Le => {
368                let end = col.partition_point(|&v| v <= threshold);
369                (0, end)
370            }
371            CompOp::Eq => {
372                let start = col.partition_point(|&v| v < threshold);
373                let end = col.partition_point(|&v| v <= threshold);
374                (start, end)
375            }
376        }
377    }
378
379    /// For a sorted ascending int column, find the row range satisfying a
380    /// comparison predicate via binary search. Returns `(start, end)`.
381    pub fn binary_search_range_i64(col: &[i64], op: CompOp, threshold: i64) -> (usize, usize) {
382        match op {
383            CompOp::Gt => {
384                let start = col.partition_point(|&v| v <= threshold);
385                (start, col.len())
386            }
387            CompOp::Ge => {
388                let start = col.partition_point(|&v| v < threshold);
389                (start, col.len())
390            }
391            CompOp::Lt => {
392                let end = col.partition_point(|&v| v < threshold);
393                (0, end)
394            }
395            CompOp::Le => {
396                let end = col.partition_point(|&v| v <= threshold);
397                (0, end)
398            }
399            CompOp::Eq => {
400                let start = col.partition_point(|&v| v < threshold);
401                let end = col.partition_point(|&v| v <= threshold);
402                (start, end)
403            }
404        }
405    }
406}
407
408/// Metadata for an entire DataFrame.
409#[derive(Debug, Clone)]
410pub struct DataFrameStats {
411    /// Per-column stats, in column order.
412    pub column_stats: Vec<(String, ColumnStats)>,
413}
414
415impl DataFrameStats {
416    /// Compute stats for all columns in a DataFrame.
417    pub fn compute(df: &crate::DataFrame) -> Self {
418        let column_stats = df
419            .columns
420            .iter()
421            .map(|(name, col)| (name.clone(), ColumnStats::compute(col)))
422            .collect();
423        DataFrameStats { column_stats }
424    }
425
426    /// Get stats for a named column.
427    pub fn get(&self, name: &str) -> Option<&ColumnStats> {
428        self.column_stats
429            .iter()
430            .find(|(n, _)| n == name)
431            .map(|(_, s)| s)
432    }
433}
434
435// ── Tests ─────────────────────────────────────────────────────────────────
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use crate::{Column, DataFrame};
441
442    // 1. Float column min/max computation
443    #[test]
444    fn test_float_min_max() {
445        let col = Column::Float(vec![3.0, 1.0, 4.0, 1.5, 9.2]);
446        let stats = ColumnStats::compute(&col);
447        assert_eq!(stats.min_f64, Some(1.0));
448        assert_eq!(stats.max_f64, Some(9.2));
449        assert_eq!(stats.nrows, 5);
450        assert_eq!(stats.n_null, 0);
451    }
452
453    // 2. Int column min/max computation
454    #[test]
455    fn test_int_min_max() {
456        let col = Column::Int(vec![10, -3, 42, 0, 7]);
457        let stats = ColumnStats::compute(&col);
458        assert_eq!(stats.min_i64, Some(-3));
459        assert_eq!(stats.max_i64, Some(42));
460        assert_eq!(stats.nrows, 5);
461    }
462
463    // 3. Sorted ascending detection
464    #[test]
465    fn test_sorted_ascending() {
466        let col = Column::Float(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
467        let stats = ColumnStats::compute(&col);
468        assert!(stats.sorted_asc);
469        assert!(!stats.sorted_desc);
470        assert!(stats.is_sorted());
471    }
472
473    // 4. Sorted descending detection
474    #[test]
475    fn test_sorted_descending() {
476        let col = Column::Int(vec![50, 40, 30, 20, 10]);
477        let stats = ColumnStats::compute(&col);
478        assert!(!stats.sorted_asc);
479        assert!(stats.sorted_desc);
480        assert!(stats.is_sorted());
481    }
482
483    // 5. Unsorted detection
484    #[test]
485    fn test_unsorted() {
486        let col = Column::Float(vec![3.0, 1.0, 4.0, 1.5, 9.2]);
487        let stats = ColumnStats::compute(&col);
488        assert!(!stats.sorted_asc);
489        assert!(!stats.sorted_desc);
490        assert!(!stats.is_sorted());
491    }
492
493    // 6a. can_skip_gt
494    #[test]
495    fn test_can_skip_gt() {
496        let col = Column::Float(vec![1.0, 2.0, 3.0]);
497        let stats = ColumnStats::compute(&col);
498        // max is 3.0, threshold 5.0 => all values <= 5.0, can skip
499        assert!(stats.can_skip_gt(5.0));
500        // max is 3.0, threshold 3.0 => all values <= 3.0, can skip
501        assert!(stats.can_skip_gt(3.0));
502        // max is 3.0, threshold 2.0 => some values > 2.0, cannot skip
503        assert!(!stats.can_skip_gt(2.0));
504    }
505
506    // 6b. can_skip_lt
507    #[test]
508    fn test_can_skip_lt() {
509        let col = Column::Float(vec![5.0, 6.0, 7.0]);
510        let stats = ColumnStats::compute(&col);
511        // min is 5.0, threshold 3.0 => all values >= 3.0, can skip
512        assert!(stats.can_skip_lt(3.0));
513        // min is 5.0, threshold 5.0 => all values >= 5.0, can skip
514        assert!(stats.can_skip_lt(5.0));
515        // min is 5.0, threshold 6.0 => some values < 6.0, cannot skip
516        assert!(!stats.can_skip_lt(6.0));
517    }
518
519    // 6c. can_skip_ge
520    #[test]
521    fn test_can_skip_ge() {
522        let col = Column::Float(vec![1.0, 2.0, 3.0]);
523        let stats = ColumnStats::compute(&col);
524        // max is 3.0, threshold 4.0 => all values < 4.0, can skip col >= 4.0
525        assert!(stats.can_skip_ge(4.0));
526        // max is 3.0, threshold 3.0 => 3.0 >= 3.0 is true, cannot skip
527        assert!(!stats.can_skip_ge(3.0));
528    }
529
530    // 6d. can_skip_le
531    #[test]
532    fn test_can_skip_le() {
533        let col = Column::Float(vec![5.0, 6.0, 7.0]);
534        let stats = ColumnStats::compute(&col);
535        // min is 5.0, threshold 4.0 => all values > 4.0, can skip col <= 4.0
536        assert!(stats.can_skip_le(4.0));
537        // min is 5.0, threshold 5.0 => 5.0 <= 5.0 is true, cannot skip
538        assert!(!stats.can_skip_le(5.0));
539    }
540
541    // 6e. can_skip_eq
542    #[test]
543    fn test_can_skip_eq() {
544        let col = Column::Float(vec![10.0, 20.0, 30.0]);
545        let stats = ColumnStats::compute(&col);
546        // value outside range
547        assert!(stats.can_skip_eq_f64(5.0));
548        assert!(stats.can_skip_eq_f64(35.0));
549        // value inside range
550        assert!(!stats.can_skip_eq_f64(15.0));
551        assert!(!stats.can_skip_eq_f64(10.0));
552        assert!(!stats.can_skip_eq_f64(30.0));
553    }
554
555    // 7. Binary search range on sorted float column
556    #[test]
557    fn test_binary_search_range_f64() {
558        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
559
560        // col > 3.0 => rows [3, 4] (values 4.0, 5.0)
561        assert_eq!(
562            ColumnStats::binary_search_range_f64(&data, CompOp::Gt, 3.0),
563            (3, 5)
564        );
565        // col >= 3.0 => rows [2, 3, 4] (values 3.0, 4.0, 5.0)
566        assert_eq!(
567            ColumnStats::binary_search_range_f64(&data, CompOp::Ge, 3.0),
568            (2, 5)
569        );
570        // col < 3.0 => rows [0, 1] (values 1.0, 2.0)
571        assert_eq!(
572            ColumnStats::binary_search_range_f64(&data, CompOp::Lt, 3.0),
573            (0, 2)
574        );
575        // col <= 3.0 => rows [0, 1, 2] (values 1.0, 2.0, 3.0)
576        assert_eq!(
577            ColumnStats::binary_search_range_f64(&data, CompOp::Le, 3.0),
578            (0, 3)
579        );
580        // col == 3.0 => row [2] (value 3.0)
581        assert_eq!(
582            ColumnStats::binary_search_range_f64(&data, CompOp::Eq, 3.0),
583            (2, 3)
584        );
585        // col == 2.5 => no match
586        assert_eq!(
587            ColumnStats::binary_search_range_f64(&data, CompOp::Eq, 2.5),
588            (2, 2)
589        );
590    }
591
592    // 7b. Binary search range on sorted int column
593    #[test]
594    fn test_binary_search_range_i64() {
595        let data = vec![10, 20, 30, 40, 50];
596
597        assert_eq!(
598            ColumnStats::binary_search_range_i64(&data, CompOp::Gt, 30),
599            (3, 5)
600        );
601        assert_eq!(
602            ColumnStats::binary_search_range_i64(&data, CompOp::Eq, 30),
603            (2, 3)
604        );
605        assert_eq!(
606            ColumnStats::binary_search_range_i64(&data, CompOp::Lt, 10),
607            (0, 0)
608        );
609    }
610
611    // 8. NaN handling: NaN does not affect min/max
612    #[test]
613    fn test_nan_handling() {
614        let col = Column::Float(vec![f64::NAN, 2.0, f64::NAN, 5.0, 1.0]);
615        let stats = ColumnStats::compute(&col);
616        assert_eq!(stats.min_f64, Some(1.0));
617        assert_eq!(stats.max_f64, Some(5.0));
618        assert_eq!(stats.n_null, 2);
619        assert_eq!(stats.nrows, 5);
620    }
621
622    // 8b. All-NaN column
623    #[test]
624    fn test_all_nan_column() {
625        let col = Column::Float(vec![f64::NAN, f64::NAN]);
626        let stats = ColumnStats::compute(&col);
627        assert_eq!(stats.min_f64, None);
628        assert_eq!(stats.max_f64, None);
629        assert_eq!(stats.n_null, 2);
630        // can_skip should return false (no stats available)
631        assert!(!stats.can_skip_gt(0.0));
632    }
633
634    // 9. Empty column stats
635    #[test]
636    fn test_empty_column() {
637        let col = Column::Float(vec![]);
638        let stats = ColumnStats::compute(&col);
639        assert_eq!(stats.min_f64, None);
640        assert_eq!(stats.max_f64, None);
641        assert_eq!(stats.nrows, 0);
642        assert!(stats.sorted_asc);
643        assert!(stats.sorted_desc);
644        assert!(stats.is_sorted());
645        assert_eq!(stats.n_distinct, Some(0));
646    }
647
648    // 9b. Empty int column
649    #[test]
650    fn test_empty_int_column() {
651        let col = Column::Int(vec![]);
652        let stats = ColumnStats::compute(&col);
653        assert_eq!(stats.min_i64, None);
654        assert_eq!(stats.max_i64, None);
655        assert_eq!(stats.nrows, 0);
656        assert!(stats.is_sorted());
657    }
658
659    // 10. String column n_distinct
660    #[test]
661    fn test_str_n_distinct() {
662        let col = Column::Str(vec![
663            "apple".into(),
664            "banana".into(),
665            "apple".into(),
666            "cherry".into(),
667        ]);
668        let stats = ColumnStats::compute(&col);
669        assert_eq!(stats.n_distinct, Some(3));
670        assert_eq!(stats.nrows, 4);
671    }
672
673    // 10b. Sorted string column
674    #[test]
675    fn test_sorted_str() {
676        let col = Column::Str(vec!["a".into(), "b".into(), "c".into()]);
677        let stats = ColumnStats::compute(&col);
678        assert!(stats.sorted_asc);
679        assert!(!stats.sorted_desc);
680    }
681
682    // 11. DataFrameStats compute + get
683    #[test]
684    fn test_dataframe_stats() {
685        let mut df = DataFrame::new();
686        df.columns
687            .push(("age".into(), Column::Int(vec![25, 30, 45])));
688        df.columns
689            .push(("score".into(), Column::Float(vec![88.5, 92.0, 76.3])));
690
691        let stats = DataFrameStats::compute(&df);
692        assert_eq!(stats.column_stats.len(), 2);
693
694        let age_stats = stats.get("age").unwrap();
695        assert_eq!(age_stats.min_i64, Some(25));
696        assert_eq!(age_stats.max_i64, Some(45));
697
698        let score_stats = stats.get("score").unwrap();
699        assert_eq!(score_stats.min_f64, Some(76.3));
700        assert_eq!(score_stats.max_f64, Some(92.0));
701
702        // Non-existent column
703        assert!(stats.get("nonexistent").is_none());
704    }
705
706    // 12. Zone map correctly identifies "can skip entire column"
707    #[test]
708    fn test_zone_map_can_skip() {
709        // Column with values [100, 200, 300]
710        let col = Column::Float(vec![100.0, 200.0, 300.0]);
711        let stats = ColumnStats::compute(&col);
712
713        // Filter: col > 500 => max=300 <= 500, can skip
714        assert!(stats.can_skip_gt(500.0));
715        // Filter: col < 50 => min=100 >= 50, can skip
716        assert!(stats.can_skip_lt(50.0));
717        // Filter: col == 400 => 400 > max, can skip
718        assert!(stats.can_skip_eq_f64(400.0));
719        // Filter: col == 50 => 50 < min, can skip
720        assert!(stats.can_skip_eq_f64(50.0));
721    }
722
723    // 13. Zone map correctly identifies "cannot skip"
724    #[test]
725    fn test_zone_map_cannot_skip() {
726        let col = Column::Float(vec![100.0, 200.0, 300.0]);
727        let stats = ColumnStats::compute(&col);
728
729        // Filter: col > 150 => max=300 > 150, cannot skip
730        assert!(!stats.can_skip_gt(150.0));
731        // Filter: col < 250 => min=100 < 250, cannot skip
732        assert!(!stats.can_skip_lt(250.0));
733        // Filter: col == 200 => in [100, 300], cannot skip
734        assert!(!stats.can_skip_eq_f64(200.0));
735    }
736
737    // 14. Determinism test: 3 runs produce identical stats
738    #[test]
739    fn test_determinism() {
740        let data = vec![3.14, 2.71, 1.41, 1.73, 0.577];
741        for _ in 0..3 {
742            let col = Column::Float(data.clone());
743            let stats = ColumnStats::compute(&col);
744            assert_eq!(stats.min_f64, Some(0.577));
745            assert_eq!(stats.max_f64, Some(3.14));
746            assert!(!stats.sorted_asc);
747            assert!(!stats.sorted_desc);
748            assert_eq!(stats.n_null, 0);
749            assert_eq!(stats.nrows, 5);
750        }
751    }
752
753    // 14b. Determinism for int + str columns
754    #[test]
755    fn test_determinism_int_str() {
756        for _ in 0..3 {
757            let icol = Column::Int(vec![9, 1, 5, 3, 7]);
758            let is = ColumnStats::compute(&icol);
759            assert_eq!(is.min_i64, Some(1));
760            assert_eq!(is.max_i64, Some(9));
761
762            let scol = Column::Str(vec!["x".into(), "a".into(), "m".into()]);
763            let ss = ColumnStats::compute(&scol);
764            assert_eq!(ss.n_distinct, Some(3));
765        }
766    }
767
768    // 15. Bool column stats
769    #[test]
770    fn test_bool_column() {
771        let col = Column::Bool(vec![true, false, true, true]);
772        let stats = ColumnStats::compute(&col);
773        assert_eq!(stats.n_distinct, Some(2));
774        assert_eq!(stats.nrows, 4);
775        assert!(!stats.sorted_asc);
776        assert!(!stats.sorted_desc);
777    }
778
779    // 16. Categorical column
780    #[test]
781    fn test_categorical_column() {
782        let col = Column::Categorical {
783            levels: vec!["low".into(), "med".into(), "high".into()],
784            codes: vec![0, 1, 2, 1, 0],
785        };
786        let stats = ColumnStats::compute(&col);
787        assert_eq!(stats.min_i64, Some(0));
788        assert_eq!(stats.max_i64, Some(2));
789        assert_eq!(stats.n_distinct, Some(3));
790        assert!(!stats.is_sorted());
791    }
792
793    // 17. Single-element column
794    #[test]
795    fn test_single_element() {
796        let col = Column::Float(vec![42.0]);
797        let stats = ColumnStats::compute(&col);
798        assert_eq!(stats.min_f64, Some(42.0));
799        assert_eq!(stats.max_f64, Some(42.0));
800        assert!(stats.sorted_asc);
801        assert!(stats.sorted_desc);
802        assert!(stats.is_sorted());
803    }
804
805    // 18. Int zone-map skip predicates
806    #[test]
807    fn test_int_zone_map_skip() {
808        let col = Column::Int(vec![10, 20, 30]);
809        let stats = ColumnStats::compute(&col);
810        assert!(stats.can_skip_gt_i64(30));
811        assert!(stats.can_skip_gt_i64(50));
812        assert!(!stats.can_skip_gt_i64(15));
813        assert!(stats.can_skip_lt_i64(5));
814        assert!(stats.can_skip_lt_i64(10));
815        assert!(!stats.can_skip_lt_i64(20));
816        assert!(stats.can_skip_eq_i64(5));
817        assert!(stats.can_skip_eq_i64(35));
818        assert!(!stats.can_skip_eq_i64(20));
819    }
820
821    // 19. Constant column (all same value)
822    #[test]
823    fn test_constant_column() {
824        let col = Column::Float(vec![7.0, 7.0, 7.0]);
825        let stats = ColumnStats::compute(&col);
826        assert_eq!(stats.min_f64, Some(7.0));
827        assert_eq!(stats.max_f64, Some(7.0));
828        // A constant column is both ascending and descending
829        assert!(stats.sorted_asc);
830        assert!(stats.sorted_desc);
831        assert!(stats.can_skip_eq_f64(8.0));
832        assert!(!stats.can_skip_eq_f64(7.0));
833    }
834}