1use crate::query::plan::{
18 AggregateOp, BinaryOp, DistinctOp, ExpandOp, FilterOp, JoinOp, JoinType, LeftJoinOp, LimitOp,
19 LogicalExpression, LogicalOperator, MultiWayJoinOp, NodeScanOp, ProjectOp, SkipOp, SortOp,
20 TripleComponent, TripleScanOp, UnaryOp, VectorJoinOp, VectorScanOp,
21};
22use std::collections::HashMap;
23
24#[derive(Debug, Clone)]
30pub struct HistogramBucket {
31 pub lower_bound: f64,
33 pub upper_bound: f64,
35 pub frequency: u64,
37 pub distinct_count: u64,
39}
40
41impl HistogramBucket {
42 #[must_use]
44 pub fn new(lower_bound: f64, upper_bound: f64, frequency: u64, distinct_count: u64) -> Self {
45 Self {
46 lower_bound,
47 upper_bound,
48 frequency,
49 distinct_count,
50 }
51 }
52
53 #[must_use]
55 pub fn width(&self) -> f64 {
56 self.upper_bound - self.lower_bound
57 }
58
59 #[must_use]
61 pub fn contains(&self, value: f64) -> bool {
62 value >= self.lower_bound && value < self.upper_bound
63 }
64
65 #[must_use]
67 pub fn overlap_fraction(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
68 let effective_lower = lower.unwrap_or(self.lower_bound).max(self.lower_bound);
69 let effective_upper = upper.unwrap_or(self.upper_bound).min(self.upper_bound);
70
71 let bucket_width = self.width();
72 if bucket_width <= 0.0 {
73 return if effective_lower <= self.lower_bound && effective_upper >= self.upper_bound {
74 1.0
75 } else {
76 0.0
77 };
78 }
79
80 let overlap = (effective_upper - effective_lower).max(0.0);
81 (overlap / bucket_width).min(1.0)
82 }
83}
84
85#[derive(Debug, Clone)]
105pub struct EquiDepthHistogram {
106 buckets: Vec<HistogramBucket>,
108 total_rows: u64,
110}
111
112impl EquiDepthHistogram {
113 #[must_use]
115 pub fn new(buckets: Vec<HistogramBucket>) -> Self {
116 let total_rows = buckets.iter().map(|b| b.frequency).sum();
117 Self {
118 buckets,
119 total_rows,
120 }
121 }
122
123 #[must_use]
132 pub fn build(values: &[f64], num_buckets: usize) -> Self {
133 if values.is_empty() || num_buckets == 0 {
134 return Self {
135 buckets: Vec::new(),
136 total_rows: 0,
137 };
138 }
139
140 let num_buckets = num_buckets.min(values.len());
141 let rows_per_bucket = (values.len() + num_buckets - 1) / num_buckets;
142 let mut buckets = Vec::with_capacity(num_buckets);
143
144 let mut start_idx = 0;
145 while start_idx < values.len() {
146 let end_idx = (start_idx + rows_per_bucket).min(values.len());
147 let lower_bound = values[start_idx];
148 let upper_bound = if end_idx < values.len() {
149 values[end_idx]
150 } else {
151 values[end_idx - 1] + 1.0
153 };
154
155 let bucket_values = &values[start_idx..end_idx];
157 let distinct_count = count_distinct(bucket_values);
158
159 buckets.push(HistogramBucket::new(
160 lower_bound,
161 upper_bound,
162 (end_idx - start_idx) as u64,
163 distinct_count,
164 ));
165
166 start_idx = end_idx;
167 }
168
169 Self::new(buckets)
170 }
171
172 #[must_use]
174 pub fn num_buckets(&self) -> usize {
175 self.buckets.len()
176 }
177
178 #[must_use]
180 pub fn total_rows(&self) -> u64 {
181 self.total_rows
182 }
183
184 #[must_use]
186 pub fn buckets(&self) -> &[HistogramBucket] {
187 &self.buckets
188 }
189
190 #[must_use]
199 pub fn range_selectivity(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
200 if self.buckets.is_empty() || self.total_rows == 0 {
201 return 0.33; }
203
204 let mut matching_rows = 0.0;
205
206 for bucket in &self.buckets {
207 let bucket_lower = bucket.lower_bound;
209 let bucket_upper = bucket.upper_bound;
210
211 if let Some(l) = lower
213 && bucket_upper <= l
214 {
215 continue;
216 }
217 if let Some(u) = upper
218 && bucket_lower >= u
219 {
220 continue;
221 }
222
223 let overlap = bucket.overlap_fraction(lower, upper);
225 matching_rows += overlap * bucket.frequency as f64;
226 }
227
228 (matching_rows / self.total_rows as f64).clamp(0.0, 1.0)
229 }
230
231 #[must_use]
235 pub fn equality_selectivity(&self, value: f64) -> f64 {
236 if self.buckets.is_empty() || self.total_rows == 0 {
237 return 0.01; }
239
240 for bucket in &self.buckets {
242 if bucket.contains(value) {
243 if bucket.distinct_count > 0 {
245 return (bucket.frequency as f64
246 / bucket.distinct_count as f64
247 / self.total_rows as f64)
248 .min(1.0);
249 }
250 }
251 }
252
253 0.001
255 }
256
257 #[must_use]
259 pub fn min_value(&self) -> Option<f64> {
260 self.buckets.first().map(|b| b.lower_bound)
261 }
262
263 #[must_use]
265 pub fn max_value(&self) -> Option<f64> {
266 self.buckets.last().map(|b| b.upper_bound)
267 }
268}
269
270fn count_and_conjuncts(expr: &LogicalExpression) -> usize {
276 match expr {
277 LogicalExpression::Binary {
278 op: BinaryOp::And,
279 left,
280 right,
281 } => count_and_conjuncts(left) + count_and_conjuncts(right),
282 _ => 1,
283 }
284}
285
286fn count_distinct(sorted_values: &[f64]) -> u64 {
287 if sorted_values.is_empty() {
288 return 0;
289 }
290
291 let mut count = 1u64;
292 let mut prev = sorted_values[0];
293
294 for &val in &sorted_values[1..] {
295 if (val - prev).abs() > f64::EPSILON {
296 count += 1;
297 prev = val;
298 }
299 }
300
301 count
302}
303
304#[derive(Debug, Clone)]
306pub struct TableStats {
307 pub row_count: u64,
309 pub columns: HashMap<String, ColumnStats>,
311}
312
313impl TableStats {
314 #[must_use]
316 pub fn new(row_count: u64) -> Self {
317 Self {
318 row_count,
319 columns: HashMap::new(),
320 }
321 }
322
323 pub fn with_column(mut self, name: &str, stats: ColumnStats) -> Self {
325 self.columns.insert(name.to_string(), stats);
326 self
327 }
328}
329
330#[derive(Debug, Clone)]
332pub struct ColumnStats {
333 pub distinct_count: u64,
335 pub null_count: u64,
337 pub min_value: Option<f64>,
339 pub max_value: Option<f64>,
341 pub histogram: Option<EquiDepthHistogram>,
343}
344
345impl ColumnStats {
346 #[must_use]
348 pub fn new(distinct_count: u64) -> Self {
349 Self {
350 distinct_count,
351 null_count: 0,
352 min_value: None,
353 max_value: None,
354 histogram: None,
355 }
356 }
357
358 #[must_use]
360 pub fn with_nulls(mut self, null_count: u64) -> Self {
361 self.null_count = null_count;
362 self
363 }
364
365 #[must_use]
367 pub fn with_range(mut self, min: f64, max: f64) -> Self {
368 self.min_value = Some(min);
369 self.max_value = Some(max);
370 self
371 }
372
373 #[must_use]
375 pub fn with_histogram(mut self, histogram: EquiDepthHistogram) -> Self {
376 self.histogram = Some(histogram);
377 self
378 }
379
380 #[must_use]
388 pub fn from_values(mut values: Vec<f64>, num_buckets: usize) -> Self {
389 if values.is_empty() {
390 return Self::new(0);
391 }
392
393 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
395
396 let min = values.first().copied();
397 let max = values.last().copied();
398 let distinct_count = count_distinct(&values);
399 let histogram = EquiDepthHistogram::build(&values, num_buckets);
400
401 Self {
402 distinct_count,
403 null_count: 0,
404 min_value: min,
405 max_value: max,
406 histogram: Some(histogram),
407 }
408 }
409}
410
411#[derive(Debug, Clone)]
417pub struct SelectivityConfig {
418 pub default: f64,
420 pub equality: f64,
422 pub inequality: f64,
424 pub range: f64,
426 pub string_ops: f64,
428 pub membership: f64,
430 pub is_null: f64,
432 pub is_not_null: f64,
434 pub distinct_fraction: f64,
436}
437
438impl SelectivityConfig {
439 #[must_use]
441 pub fn new() -> Self {
442 Self {
443 default: 0.1,
444 equality: 0.01,
445 inequality: 0.99,
446 range: 0.33,
447 string_ops: 0.1,
448 membership: 0.1,
449 is_null: 0.05,
450 is_not_null: 0.95,
451 distinct_fraction: 0.5,
452 }
453 }
454}
455
456impl Default for SelectivityConfig {
457 fn default() -> Self {
458 Self::new()
459 }
460}
461
462#[derive(Debug, Clone)]
464pub struct EstimationEntry {
465 pub operator: String,
467 pub estimated: f64,
469 pub actual: f64,
471}
472
473impl EstimationEntry {
474 #[must_use]
480 pub fn error_ratio(&self) -> f64 {
481 if self.estimated.abs() < f64::EPSILON {
482 if self.actual.abs() < f64::EPSILON {
483 1.0
484 } else {
485 f64::INFINITY
486 }
487 } else {
488 self.actual / self.estimated
489 }
490 }
491}
492
493#[derive(Debug, Clone, Default)]
500pub struct EstimationLog {
501 entries: Vec<EstimationEntry>,
503 replan_threshold: f64,
507}
508
509impl EstimationLog {
510 #[must_use]
512 pub fn new(replan_threshold: f64) -> Self {
513 Self {
514 entries: Vec::new(),
515 replan_threshold,
516 }
517 }
518
519 pub fn record(&mut self, operator: impl Into<String>, estimated: f64, actual: f64) {
521 self.entries.push(EstimationEntry {
522 operator: operator.into(),
523 estimated,
524 actual,
525 });
526 }
527
528 #[must_use]
530 pub fn entries(&self) -> &[EstimationEntry] {
531 &self.entries
532 }
533
534 #[must_use]
537 pub fn should_replan(&self) -> bool {
538 self.entries.iter().any(|e| {
539 let ratio = e.error_ratio();
540 ratio > self.replan_threshold || ratio < 1.0 / self.replan_threshold
541 })
542 }
543
544 #[must_use]
546 pub fn max_error_ratio(&self) -> f64 {
547 self.entries
548 .iter()
549 .map(|e| {
550 let r = e.error_ratio();
551 if r < 1.0 { 1.0 / r } else { r }
553 })
554 .fold(1.0_f64, f64::max)
555 }
556
557 pub fn clear(&mut self) {
559 self.entries.clear();
560 }
561}
562
563pub struct CardinalityEstimator {
565 table_stats: HashMap<String, TableStats>,
567 default_row_count: u64,
569 default_selectivity: f64,
571 avg_fanout: f64,
573 selectivity_config: SelectivityConfig,
575 rdf_statistics: Option<grafeo_core::statistics::RdfStatistics>,
577}
578
579impl CardinalityEstimator {
580 #[must_use]
582 pub fn new() -> Self {
583 let config = SelectivityConfig::new();
584 Self {
585 table_stats: HashMap::new(),
586 default_row_count: 1000,
587 default_selectivity: config.default,
588 avg_fanout: 10.0,
589 selectivity_config: config,
590 rdf_statistics: None,
591 }
592 }
593
594 #[must_use]
596 pub fn with_selectivity_config(config: SelectivityConfig) -> Self {
597 Self {
598 table_stats: HashMap::new(),
599 default_row_count: 1000,
600 default_selectivity: config.default,
601 avg_fanout: 10.0,
602 selectivity_config: config,
603 rdf_statistics: None,
604 }
605 }
606
607 #[must_use]
609 pub fn selectivity_config(&self) -> &SelectivityConfig {
610 &self.selectivity_config
611 }
612
613 #[must_use]
615 pub fn create_estimation_log() -> EstimationLog {
616 EstimationLog::new(10.0)
617 }
618
619 #[must_use]
625 pub fn from_statistics(stats: &grafeo_core::statistics::Statistics) -> Self {
626 let mut estimator = Self::new();
627
628 if stats.total_nodes > 0 {
630 estimator.default_row_count = stats.total_nodes;
631 }
632
633 for (label, label_stats) in &stats.labels {
635 let mut table_stats = TableStats::new(label_stats.node_count);
636
637 for (prop, col_stats) in &label_stats.properties {
639 let optimizer_col =
640 ColumnStats::new(col_stats.distinct_count).with_nulls(col_stats.null_count);
641 table_stats = table_stats.with_column(prop, optimizer_col);
642 }
643
644 estimator.add_table_stats(label, table_stats);
645 }
646
647 if !stats.edge_types.is_empty() {
649 let total_out_degree: f64 = stats.edge_types.values().map(|e| e.avg_out_degree).sum();
650 estimator.avg_fanout = total_out_degree / stats.edge_types.len() as f64;
651 } else if stats.total_nodes > 0 {
652 estimator.avg_fanout = stats.total_edges as f64 / stats.total_nodes as f64;
653 }
654
655 if estimator.avg_fanout < 1.0 {
657 estimator.avg_fanout = 1.0;
658 }
659
660 estimator
661 }
662
663 #[must_use]
668 pub fn from_rdf_statistics(rdf_stats: grafeo_core::statistics::RdfStatistics) -> Self {
669 let mut estimator = Self::new();
670 if rdf_stats.total_triples > 0 {
671 estimator.default_row_count = rdf_stats.total_triples;
672 }
673 estimator.rdf_statistics = Some(rdf_stats);
674 estimator
675 }
676
677 pub fn add_table_stats(&mut self, name: &str, stats: TableStats) {
679 self.table_stats.insert(name.to_string(), stats);
680 }
681
682 pub fn set_avg_fanout(&mut self, fanout: f64) {
684 self.avg_fanout = fanout;
685 }
686
687 #[must_use]
689 pub fn estimate(&self, op: &LogicalOperator) -> f64 {
690 match op {
691 LogicalOperator::NodeScan(scan) => self.estimate_node_scan(scan),
692 LogicalOperator::Filter(filter) => self.estimate_filter(filter),
693 LogicalOperator::Project(project) => self.estimate_project(project),
694 LogicalOperator::Expand(expand) => self.estimate_expand(expand),
695 LogicalOperator::Join(join) => self.estimate_join(join),
696 LogicalOperator::Aggregate(agg) => self.estimate_aggregate(agg),
697 LogicalOperator::Sort(sort) => self.estimate_sort(sort),
698 LogicalOperator::Distinct(distinct) => self.estimate_distinct(distinct),
699 LogicalOperator::Limit(limit) => self.estimate_limit(limit),
700 LogicalOperator::Skip(skip) => self.estimate_skip(skip),
701 LogicalOperator::Return(ret) => self.estimate(&ret.input),
702 LogicalOperator::Empty => 0.0,
703 LogicalOperator::VectorScan(scan) => self.estimate_vector_scan(scan),
704 LogicalOperator::VectorJoin(join) => self.estimate_vector_join(join),
705 LogicalOperator::MultiWayJoin(mwj) => self.estimate_multi_way_join(mwj),
706 LogicalOperator::LeftJoin(lj) => self.estimate_left_join(lj),
707 LogicalOperator::TripleScan(scan) => self.estimate_triple_scan(scan),
708 _ => self.default_row_count as f64,
709 }
710 }
711
712 fn estimate_node_scan(&self, scan: &NodeScanOp) -> f64 {
714 if let Some(label) = &scan.label
715 && let Some(stats) = self.table_stats.get(label)
716 {
717 return stats.row_count as f64;
718 }
719 self.default_row_count as f64
721 }
722
723 fn estimate_triple_scan(&self, scan: &TripleScanOp) -> f64 {
729 let base = if let Some(ref input) = scan.input {
732 self.estimate(input)
733 } else {
734 1.0
735 };
736
737 let Some(rdf_stats) = &self.rdf_statistics else {
738 return if scan.input.is_some() {
739 base * self.default_row_count as f64
740 } else {
741 self.default_row_count as f64
742 };
743 };
744
745 let subject_bound = matches!(
746 scan.subject,
747 TripleComponent::Iri(_)
748 | TripleComponent::Literal(_)
749 | TripleComponent::LangLiteral { .. }
750 );
751 let object_bound = matches!(
752 scan.object,
753 TripleComponent::Iri(_)
754 | TripleComponent::Literal(_)
755 | TripleComponent::LangLiteral { .. }
756 );
757 let predicate_iri = match &scan.predicate {
758 TripleComponent::Iri(iri) => Some(iri.as_str()),
759 _ => None,
760 };
761
762 let pattern_card = rdf_stats.estimate_triple_pattern_cardinality(
763 subject_bound,
764 predicate_iri,
765 object_bound,
766 );
767
768 if scan.input.is_some() {
769 let selectivity = if rdf_stats.total_triples > 0 {
771 pattern_card / rdf_stats.total_triples as f64
772 } else {
773 1.0
774 };
775 (base * pattern_card * selectivity).max(1.0)
776 } else {
777 pattern_card.max(1.0)
778 }
779 }
780
781 fn estimate_filter(&self, filter: &FilterOp) -> f64 {
783 let input_cardinality = self.estimate(&filter.input);
784 let selectivity = self.estimate_selectivity(&filter.predicate);
785 (input_cardinality * selectivity).max(1.0)
786 }
787
788 fn estimate_project(&self, project: &ProjectOp) -> f64 {
790 self.estimate(&project.input)
791 }
792
793 fn estimate_expand(&self, expand: &ExpandOp) -> f64 {
795 let input_cardinality = self.estimate(&expand.input);
796
797 let fanout = if !expand.edge_types.is_empty() {
799 self.avg_fanout * 0.5
801 } else {
802 self.avg_fanout
803 };
804
805 let path_multiplier = if expand.max_hops.unwrap_or(1) > 1 {
807 let min = expand.min_hops as f64;
808 let max = expand.max_hops.unwrap_or(expand.min_hops + 3) as f64;
809 (fanout.powf(max + 1.0) - fanout.powf(min)) / (fanout - 1.0)
811 } else {
812 fanout
813 };
814
815 (input_cardinality * path_multiplier).max(1.0)
816 }
817
818 fn estimate_join(&self, join: &JoinOp) -> f64 {
820 let left_card = self.estimate(&join.left);
821 let right_card = self.estimate(&join.right);
822
823 match join.join_type {
824 JoinType::Cross => left_card * right_card,
825 JoinType::Inner => {
826 let selectivity = if join.conditions.is_empty() {
828 1.0 } else {
830 0.1_f64.powi(join.conditions.len() as i32)
832 };
833 (left_card * right_card * selectivity).max(1.0)
834 }
835 JoinType::Left => {
836 let inner_card = self.estimate_join(&JoinOp {
838 left: join.left.clone(),
839 right: join.right.clone(),
840 join_type: JoinType::Inner,
841 conditions: join.conditions.clone(),
842 });
843 inner_card.max(left_card)
844 }
845 JoinType::Right => {
846 let inner_card = self.estimate_join(&JoinOp {
848 left: join.left.clone(),
849 right: join.right.clone(),
850 join_type: JoinType::Inner,
851 conditions: join.conditions.clone(),
852 });
853 inner_card.max(right_card)
854 }
855 JoinType::Full => {
856 let inner_card = self.estimate_join(&JoinOp {
858 left: join.left.clone(),
859 right: join.right.clone(),
860 join_type: JoinType::Inner,
861 conditions: join.conditions.clone(),
862 });
863 inner_card.max(left_card.max(right_card))
864 }
865 JoinType::Semi => {
866 (left_card * self.default_selectivity).max(1.0)
868 }
869 JoinType::Anti => {
870 (left_card * (1.0 - self.default_selectivity)).max(1.0)
872 }
873 }
874 }
875
876 fn estimate_left_join(&self, lj: &LeftJoinOp) -> f64 {
885 let left_card = self.estimate(&lj.left);
886 let right_card = self.estimate(&lj.right);
887
888 let condition_selectivity = if let Some(cond) = &lj.condition {
891 let n = count_and_conjuncts(cond);
892 self.default_selectivity.powi(n as i32)
893 } else {
894 self.default_selectivity
895 };
896
897 let inner_estimate = left_card * right_card * condition_selectivity;
899 inner_estimate.max(left_card).max(1.0)
900 }
901
902 fn estimate_aggregate(&self, agg: &AggregateOp) -> f64 {
904 let input_cardinality = self.estimate(&agg.input);
905
906 if agg.group_by.is_empty() {
907 1.0
909 } else {
910 let group_reduction = 10.0_f64.powi(agg.group_by.len() as i32);
913 (input_cardinality / group_reduction).max(1.0)
914 }
915 }
916
917 fn estimate_sort(&self, sort: &SortOp) -> f64 {
919 self.estimate(&sort.input)
920 }
921
922 fn estimate_distinct(&self, distinct: &DistinctOp) -> f64 {
924 let input_cardinality = self.estimate(&distinct.input);
925 (input_cardinality * self.selectivity_config.distinct_fraction).max(1.0)
926 }
927
928 fn estimate_limit(&self, limit: &LimitOp) -> f64 {
930 let input_cardinality = self.estimate(&limit.input);
931 limit.count.estimate().min(input_cardinality)
932 }
933
934 fn estimate_skip(&self, skip: &SkipOp) -> f64 {
936 let input_cardinality = self.estimate(&skip.input);
937 (input_cardinality - skip.count.estimate()).max(0.0)
938 }
939
940 fn estimate_vector_scan(&self, scan: &VectorScanOp) -> f64 {
945 let base_k = scan.k as f64;
946
947 let selectivity = if scan.min_similarity.is_some() || scan.max_distance.is_some() {
949 0.7
951 } else {
952 1.0
953 };
954
955 (base_k * selectivity).max(1.0)
956 }
957
958 fn estimate_vector_join(&self, join: &VectorJoinOp) -> f64 {
962 let input_cardinality = self.estimate(&join.input);
963 let k = join.k as f64;
964
965 let selectivity = if join.min_similarity.is_some() || join.max_distance.is_some() {
967 0.7
968 } else {
969 1.0
970 };
971
972 (input_cardinality * k * selectivity).max(1.0)
973 }
974
975 fn estimate_multi_way_join(&self, mwj: &MultiWayJoinOp) -> f64 {
980 if mwj.inputs.is_empty() {
981 return 0.0;
982 }
983 let cardinalities: Vec<f64> = mwj
984 .inputs
985 .iter()
986 .map(|input| self.estimate(input))
987 .collect();
988 let min_card = cardinalities.iter().copied().fold(f64::INFINITY, f64::min);
989 let n = cardinalities.len() as f64;
990 (min_card.powf(n / 2.0)).max(1.0)
992 }
993
994 fn estimate_selectivity(&self, expr: &LogicalExpression) -> f64 {
996 match expr {
997 LogicalExpression::Binary { left, op, right } => {
998 self.estimate_binary_selectivity(left, *op, right)
999 }
1000 LogicalExpression::Unary { op, operand } => {
1001 self.estimate_unary_selectivity(*op, operand)
1002 }
1003 LogicalExpression::Literal(value) => {
1004 if let grafeo_common::types::Value::Bool(b) = value {
1006 if *b { 1.0 } else { 0.0 }
1007 } else {
1008 self.default_selectivity
1009 }
1010 }
1011 _ => self.default_selectivity,
1012 }
1013 }
1014
1015 fn estimate_binary_selectivity(
1017 &self,
1018 left: &LogicalExpression,
1019 op: BinaryOp,
1020 right: &LogicalExpression,
1021 ) -> f64 {
1022 match op {
1023 BinaryOp::Eq => {
1025 if let Some(selectivity) = self.try_equality_selectivity(left, right) {
1026 return selectivity;
1027 }
1028 self.selectivity_config.equality
1029 }
1030 BinaryOp::Ne => self.selectivity_config.inequality,
1032 BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge => {
1034 if let Some(selectivity) = self.try_range_selectivity(left, op, right) {
1035 return selectivity;
1036 }
1037 self.selectivity_config.range
1038 }
1039 BinaryOp::And => {
1041 let left_sel = self.estimate_selectivity(left);
1042 let right_sel = self.estimate_selectivity(right);
1043 left_sel * right_sel
1045 }
1046 BinaryOp::Or => {
1047 let left_sel = self.estimate_selectivity(left);
1048 let right_sel = self.estimate_selectivity(right);
1049 (left_sel + right_sel - left_sel * right_sel).min(1.0)
1052 }
1053 BinaryOp::StartsWith | BinaryOp::EndsWith | BinaryOp::Contains | BinaryOp::Like => {
1055 self.selectivity_config.string_ops
1056 }
1057 BinaryOp::In => self.selectivity_config.membership,
1059 _ => self.default_selectivity,
1061 }
1062 }
1063
1064 fn try_equality_selectivity(
1066 &self,
1067 left: &LogicalExpression,
1068 right: &LogicalExpression,
1069 ) -> Option<f64> {
1070 let (label, column, value) = self.extract_column_and_value(left, right)?;
1072
1073 let stats = self.get_column_stats(&label, &column)?;
1075
1076 if let Some(ref histogram) = stats.histogram {
1078 return Some(histogram.equality_selectivity(value));
1079 }
1080
1081 if stats.distinct_count > 0 {
1083 return Some(1.0 / stats.distinct_count as f64);
1084 }
1085
1086 None
1087 }
1088
1089 fn try_range_selectivity(
1091 &self,
1092 left: &LogicalExpression,
1093 op: BinaryOp,
1094 right: &LogicalExpression,
1095 ) -> Option<f64> {
1096 let (label, column, value) = self.extract_column_and_value(left, right)?;
1098
1099 let stats = self.get_column_stats(&label, &column)?;
1101
1102 let (lower, upper) = match op {
1104 BinaryOp::Lt => (None, Some(value)),
1105 BinaryOp::Le => (None, Some(value + f64::EPSILON)),
1106 BinaryOp::Gt => (Some(value + f64::EPSILON), None),
1107 BinaryOp::Ge => (Some(value), None),
1108 _ => return None,
1109 };
1110
1111 if let Some(ref histogram) = stats.histogram {
1113 return Some(histogram.range_selectivity(lower, upper));
1114 }
1115
1116 if let (Some(min), Some(max)) = (stats.min_value, stats.max_value) {
1118 let range = max - min;
1119 if range <= 0.0 {
1120 return Some(1.0);
1121 }
1122
1123 let effective_lower = lower.unwrap_or(min).max(min);
1124 let effective_upper = upper.unwrap_or(max).min(max);
1125 let overlap = (effective_upper - effective_lower).max(0.0);
1126 return Some((overlap / range).clamp(0.0, 1.0));
1127 }
1128
1129 None
1130 }
1131
1132 fn extract_column_and_value(
1137 &self,
1138 left: &LogicalExpression,
1139 right: &LogicalExpression,
1140 ) -> Option<(String, String, f64)> {
1141 if let Some(result) = self.try_extract_property_literal(left, right) {
1143 return Some(result);
1144 }
1145
1146 self.try_extract_property_literal(right, left)
1148 }
1149
1150 fn try_extract_property_literal(
1152 &self,
1153 property_expr: &LogicalExpression,
1154 literal_expr: &LogicalExpression,
1155 ) -> Option<(String, String, f64)> {
1156 let (variable, property) = match property_expr {
1158 LogicalExpression::Property { variable, property } => {
1159 (variable.clone(), property.clone())
1160 }
1161 _ => return None,
1162 };
1163
1164 let value = match literal_expr {
1166 LogicalExpression::Literal(grafeo_common::types::Value::Int64(n)) => *n as f64,
1167 LogicalExpression::Literal(grafeo_common::types::Value::Float64(f)) => *f,
1168 _ => return None,
1169 };
1170
1171 for label in self.table_stats.keys() {
1175 if let Some(stats) = self.table_stats.get(label)
1176 && stats.columns.contains_key(&property)
1177 {
1178 return Some((label.clone(), property, value));
1179 }
1180 }
1181
1182 Some((variable, property, value))
1184 }
1185
1186 fn estimate_unary_selectivity(&self, op: UnaryOp, _operand: &LogicalExpression) -> f64 {
1188 match op {
1189 UnaryOp::Not => 1.0 - self.default_selectivity,
1190 UnaryOp::IsNull => self.selectivity_config.is_null,
1191 UnaryOp::IsNotNull => self.selectivity_config.is_not_null,
1192 UnaryOp::Neg => 1.0, }
1194 }
1195
1196 fn get_column_stats(&self, label: &str, column: &str) -> Option<&ColumnStats> {
1198 self.table_stats.get(label)?.columns.get(column)
1199 }
1200}
1201
1202impl Default for CardinalityEstimator {
1203 fn default() -> Self {
1204 Self::new()
1205 }
1206}
1207
1208#[cfg(test)]
1209mod tests {
1210 use super::*;
1211 use crate::query::plan::{
1212 DistinctOp, ExpandDirection, ExpandOp, FilterOp, JoinCondition, NodeScanOp, PathMode,
1213 ProjectOp, Projection, ReturnItem, ReturnOp, SkipOp, SortKey, SortOp, SortOrder,
1214 };
1215 use grafeo_common::types::Value;
1216
1217 #[test]
1218 fn test_node_scan_with_stats() {
1219 let mut estimator = CardinalityEstimator::new();
1220 estimator.add_table_stats("Person", TableStats::new(5000));
1221
1222 let scan = LogicalOperator::NodeScan(NodeScanOp {
1223 variable: "n".to_string(),
1224 label: Some("Person".to_string()),
1225 input: None,
1226 });
1227
1228 let cardinality = estimator.estimate(&scan);
1229 assert!((cardinality - 5000.0).abs() < 0.001);
1230 }
1231
1232 #[test]
1233 fn test_filter_reduces_cardinality() {
1234 let mut estimator = CardinalityEstimator::new();
1235 estimator.add_table_stats("Person", TableStats::new(1000));
1236
1237 let filter = LogicalOperator::Filter(FilterOp {
1238 predicate: LogicalExpression::Binary {
1239 left: Box::new(LogicalExpression::Property {
1240 variable: "n".to_string(),
1241 property: "age".to_string(),
1242 }),
1243 op: BinaryOp::Eq,
1244 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1245 },
1246 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1247 variable: "n".to_string(),
1248 label: Some("Person".to_string()),
1249 input: None,
1250 })),
1251 pushdown_hint: None,
1252 });
1253
1254 let cardinality = estimator.estimate(&filter);
1255 assert!(cardinality < 1000.0);
1257 assert!(cardinality >= 1.0);
1258 }
1259
1260 #[test]
1261 fn test_join_cardinality() {
1262 let mut estimator = CardinalityEstimator::new();
1263 estimator.add_table_stats("Person", TableStats::new(1000));
1264 estimator.add_table_stats("Company", TableStats::new(100));
1265
1266 let join = LogicalOperator::Join(JoinOp {
1267 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1268 variable: "p".to_string(),
1269 label: Some("Person".to_string()),
1270 input: None,
1271 })),
1272 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1273 variable: "c".to_string(),
1274 label: Some("Company".to_string()),
1275 input: None,
1276 })),
1277 join_type: JoinType::Inner,
1278 conditions: vec![JoinCondition {
1279 left: LogicalExpression::Property {
1280 variable: "p".to_string(),
1281 property: "company_id".to_string(),
1282 },
1283 right: LogicalExpression::Property {
1284 variable: "c".to_string(),
1285 property: "id".to_string(),
1286 },
1287 }],
1288 });
1289
1290 let cardinality = estimator.estimate(&join);
1291 assert!(cardinality < 1000.0 * 100.0);
1293 }
1294
1295 #[test]
1296 fn test_limit_caps_cardinality() {
1297 let mut estimator = CardinalityEstimator::new();
1298 estimator.add_table_stats("Person", TableStats::new(1000));
1299
1300 let limit = LogicalOperator::Limit(LimitOp {
1301 count: 10.into(),
1302 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1303 variable: "n".to_string(),
1304 label: Some("Person".to_string()),
1305 input: None,
1306 })),
1307 });
1308
1309 let cardinality = estimator.estimate(&limit);
1310 assert!((cardinality - 10.0).abs() < 0.001);
1311 }
1312
1313 #[test]
1314 fn test_aggregate_reduces_cardinality() {
1315 let mut estimator = CardinalityEstimator::new();
1316 estimator.add_table_stats("Person", TableStats::new(1000));
1317
1318 let global_agg = LogicalOperator::Aggregate(AggregateOp {
1320 group_by: vec![],
1321 aggregates: vec![],
1322 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1323 variable: "n".to_string(),
1324 label: Some("Person".to_string()),
1325 input: None,
1326 })),
1327 having: None,
1328 });
1329
1330 let cardinality = estimator.estimate(&global_agg);
1331 assert!((cardinality - 1.0).abs() < 0.001);
1332
1333 let group_agg = LogicalOperator::Aggregate(AggregateOp {
1335 group_by: vec![LogicalExpression::Property {
1336 variable: "n".to_string(),
1337 property: "city".to_string(),
1338 }],
1339 aggregates: vec![],
1340 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1341 variable: "n".to_string(),
1342 label: Some("Person".to_string()),
1343 input: None,
1344 })),
1345 having: None,
1346 });
1347
1348 let cardinality = estimator.estimate(&group_agg);
1349 assert!(cardinality < 1000.0);
1351 }
1352
1353 #[test]
1354 fn test_node_scan_without_stats() {
1355 let estimator = CardinalityEstimator::new();
1356
1357 let scan = LogicalOperator::NodeScan(NodeScanOp {
1358 variable: "n".to_string(),
1359 label: Some("Unknown".to_string()),
1360 input: None,
1361 });
1362
1363 let cardinality = estimator.estimate(&scan);
1364 assert!((cardinality - 1000.0).abs() < 0.001);
1366 }
1367
1368 #[test]
1369 fn test_node_scan_no_label() {
1370 let estimator = CardinalityEstimator::new();
1371
1372 let scan = LogicalOperator::NodeScan(NodeScanOp {
1373 variable: "n".to_string(),
1374 label: None,
1375 input: None,
1376 });
1377
1378 let cardinality = estimator.estimate(&scan);
1379 assert!((cardinality - 1000.0).abs() < 0.001);
1381 }
1382
1383 #[test]
1384 fn test_filter_inequality_selectivity() {
1385 let mut estimator = CardinalityEstimator::new();
1386 estimator.add_table_stats("Person", TableStats::new(1000));
1387
1388 let filter = LogicalOperator::Filter(FilterOp {
1389 predicate: LogicalExpression::Binary {
1390 left: Box::new(LogicalExpression::Property {
1391 variable: "n".to_string(),
1392 property: "age".to_string(),
1393 }),
1394 op: BinaryOp::Ne,
1395 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1396 },
1397 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1398 variable: "n".to_string(),
1399 label: Some("Person".to_string()),
1400 input: None,
1401 })),
1402 pushdown_hint: None,
1403 });
1404
1405 let cardinality = estimator.estimate(&filter);
1406 assert!(cardinality > 900.0);
1408 }
1409
1410 #[test]
1411 fn test_filter_range_selectivity() {
1412 let mut estimator = CardinalityEstimator::new();
1413 estimator.add_table_stats("Person", TableStats::new(1000));
1414
1415 let filter = LogicalOperator::Filter(FilterOp {
1416 predicate: LogicalExpression::Binary {
1417 left: Box::new(LogicalExpression::Property {
1418 variable: "n".to_string(),
1419 property: "age".to_string(),
1420 }),
1421 op: BinaryOp::Gt,
1422 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1423 },
1424 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1425 variable: "n".to_string(),
1426 label: Some("Person".to_string()),
1427 input: None,
1428 })),
1429 pushdown_hint: None,
1430 });
1431
1432 let cardinality = estimator.estimate(&filter);
1433 assert!(cardinality < 500.0);
1435 assert!(cardinality > 100.0);
1436 }
1437
1438 #[test]
1439 fn test_filter_and_selectivity() {
1440 let mut estimator = CardinalityEstimator::new();
1441 estimator.add_table_stats("Person", TableStats::new(1000));
1442
1443 let filter = LogicalOperator::Filter(FilterOp {
1446 predicate: LogicalExpression::Binary {
1447 left: Box::new(LogicalExpression::Binary {
1448 left: Box::new(LogicalExpression::Property {
1449 variable: "n".to_string(),
1450 property: "city".to_string(),
1451 }),
1452 op: BinaryOp::Eq,
1453 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1454 }),
1455 op: BinaryOp::And,
1456 right: Box::new(LogicalExpression::Binary {
1457 left: Box::new(LogicalExpression::Property {
1458 variable: "n".to_string(),
1459 property: "age".to_string(),
1460 }),
1461 op: BinaryOp::Eq,
1462 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1463 }),
1464 },
1465 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1466 variable: "n".to_string(),
1467 label: Some("Person".to_string()),
1468 input: None,
1469 })),
1470 pushdown_hint: None,
1471 });
1472
1473 let cardinality = estimator.estimate(&filter);
1474 assert!(cardinality < 100.0);
1477 assert!(cardinality >= 1.0);
1478 }
1479
1480 #[test]
1481 fn test_filter_or_selectivity() {
1482 let mut estimator = CardinalityEstimator::new();
1483 estimator.add_table_stats("Person", TableStats::new(1000));
1484
1485 let filter = LogicalOperator::Filter(FilterOp {
1489 predicate: LogicalExpression::Binary {
1490 left: Box::new(LogicalExpression::Binary {
1491 left: Box::new(LogicalExpression::Property {
1492 variable: "n".to_string(),
1493 property: "city".to_string(),
1494 }),
1495 op: BinaryOp::Eq,
1496 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1497 }),
1498 op: BinaryOp::Or,
1499 right: Box::new(LogicalExpression::Binary {
1500 left: Box::new(LogicalExpression::Property {
1501 variable: "n".to_string(),
1502 property: "city".to_string(),
1503 }),
1504 op: BinaryOp::Eq,
1505 right: Box::new(LogicalExpression::Literal(Value::String("LA".into()))),
1506 }),
1507 },
1508 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1509 variable: "n".to_string(),
1510 label: Some("Person".to_string()),
1511 input: None,
1512 })),
1513 pushdown_hint: None,
1514 });
1515
1516 let cardinality = estimator.estimate(&filter);
1517 assert!(cardinality < 100.0);
1519 assert!(cardinality >= 1.0);
1520 }
1521
1522 #[test]
1523 fn test_filter_literal_true() {
1524 let mut estimator = CardinalityEstimator::new();
1525 estimator.add_table_stats("Person", TableStats::new(1000));
1526
1527 let filter = LogicalOperator::Filter(FilterOp {
1528 predicate: LogicalExpression::Literal(Value::Bool(true)),
1529 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1530 variable: "n".to_string(),
1531 label: Some("Person".to_string()),
1532 input: None,
1533 })),
1534 pushdown_hint: None,
1535 });
1536
1537 let cardinality = estimator.estimate(&filter);
1538 assert!((cardinality - 1000.0).abs() < 0.001);
1540 }
1541
1542 #[test]
1543 fn test_filter_literal_false() {
1544 let mut estimator = CardinalityEstimator::new();
1545 estimator.add_table_stats("Person", TableStats::new(1000));
1546
1547 let filter = LogicalOperator::Filter(FilterOp {
1548 predicate: LogicalExpression::Literal(Value::Bool(false)),
1549 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1550 variable: "n".to_string(),
1551 label: Some("Person".to_string()),
1552 input: None,
1553 })),
1554 pushdown_hint: None,
1555 });
1556
1557 let cardinality = estimator.estimate(&filter);
1558 assert!((cardinality - 1.0).abs() < 0.001);
1560 }
1561
1562 #[test]
1563 fn test_unary_not_selectivity() {
1564 let mut estimator = CardinalityEstimator::new();
1565 estimator.add_table_stats("Person", TableStats::new(1000));
1566
1567 let filter = LogicalOperator::Filter(FilterOp {
1568 predicate: LogicalExpression::Unary {
1569 op: UnaryOp::Not,
1570 operand: Box::new(LogicalExpression::Literal(Value::Bool(true))),
1571 },
1572 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1573 variable: "n".to_string(),
1574 label: Some("Person".to_string()),
1575 input: None,
1576 })),
1577 pushdown_hint: None,
1578 });
1579
1580 let cardinality = estimator.estimate(&filter);
1581 assert!(cardinality < 1000.0);
1583 }
1584
1585 #[test]
1586 fn test_unary_is_null_selectivity() {
1587 let mut estimator = CardinalityEstimator::new();
1588 estimator.add_table_stats("Person", TableStats::new(1000));
1589
1590 let filter = LogicalOperator::Filter(FilterOp {
1591 predicate: LogicalExpression::Unary {
1592 op: UnaryOp::IsNull,
1593 operand: Box::new(LogicalExpression::Variable("x".to_string())),
1594 },
1595 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1596 variable: "n".to_string(),
1597 label: Some("Person".to_string()),
1598 input: None,
1599 })),
1600 pushdown_hint: None,
1601 });
1602
1603 let cardinality = estimator.estimate(&filter);
1604 assert!(cardinality < 100.0);
1606 }
1607
1608 #[test]
1609 fn test_expand_cardinality() {
1610 let mut estimator = CardinalityEstimator::new();
1611 estimator.add_table_stats("Person", TableStats::new(100));
1612
1613 let expand = LogicalOperator::Expand(ExpandOp {
1614 from_variable: "a".to_string(),
1615 to_variable: "b".to_string(),
1616 edge_variable: None,
1617 direction: ExpandDirection::Outgoing,
1618 edge_types: vec![],
1619 min_hops: 1,
1620 max_hops: Some(1),
1621 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1622 variable: "a".to_string(),
1623 label: Some("Person".to_string()),
1624 input: None,
1625 })),
1626 path_alias: None,
1627 path_mode: PathMode::Walk,
1628 });
1629
1630 let cardinality = estimator.estimate(&expand);
1631 assert!(cardinality > 100.0);
1633 }
1634
1635 #[test]
1636 fn test_expand_with_edge_type_filter() {
1637 let mut estimator = CardinalityEstimator::new();
1638 estimator.add_table_stats("Person", TableStats::new(100));
1639
1640 let expand = LogicalOperator::Expand(ExpandOp {
1641 from_variable: "a".to_string(),
1642 to_variable: "b".to_string(),
1643 edge_variable: None,
1644 direction: ExpandDirection::Outgoing,
1645 edge_types: vec!["KNOWS".to_string()],
1646 min_hops: 1,
1647 max_hops: Some(1),
1648 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1649 variable: "a".to_string(),
1650 label: Some("Person".to_string()),
1651 input: None,
1652 })),
1653 path_alias: None,
1654 path_mode: PathMode::Walk,
1655 });
1656
1657 let cardinality = estimator.estimate(&expand);
1658 assert!(cardinality > 100.0);
1660 }
1661
1662 #[test]
1663 fn test_expand_variable_length() {
1664 let mut estimator = CardinalityEstimator::new();
1665 estimator.add_table_stats("Person", TableStats::new(100));
1666
1667 let expand = LogicalOperator::Expand(ExpandOp {
1668 from_variable: "a".to_string(),
1669 to_variable: "b".to_string(),
1670 edge_variable: None,
1671 direction: ExpandDirection::Outgoing,
1672 edge_types: vec![],
1673 min_hops: 1,
1674 max_hops: Some(3),
1675 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1676 variable: "a".to_string(),
1677 label: Some("Person".to_string()),
1678 input: None,
1679 })),
1680 path_alias: None,
1681 path_mode: PathMode::Walk,
1682 });
1683
1684 let cardinality = estimator.estimate(&expand);
1685 assert!(cardinality > 500.0);
1687 }
1688
1689 #[test]
1690 fn test_join_cross_product() {
1691 let mut estimator = CardinalityEstimator::new();
1692 estimator.add_table_stats("Person", TableStats::new(100));
1693 estimator.add_table_stats("Company", TableStats::new(50));
1694
1695 let join = LogicalOperator::Join(JoinOp {
1696 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1697 variable: "p".to_string(),
1698 label: Some("Person".to_string()),
1699 input: None,
1700 })),
1701 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1702 variable: "c".to_string(),
1703 label: Some("Company".to_string()),
1704 input: None,
1705 })),
1706 join_type: JoinType::Cross,
1707 conditions: vec![],
1708 });
1709
1710 let cardinality = estimator.estimate(&join);
1711 assert!((cardinality - 5000.0).abs() < 0.001);
1713 }
1714
1715 #[test]
1716 fn test_join_left_outer() {
1717 let mut estimator = CardinalityEstimator::new();
1718 estimator.add_table_stats("Person", TableStats::new(1000));
1719 estimator.add_table_stats("Company", TableStats::new(10));
1720
1721 let join = LogicalOperator::Join(JoinOp {
1722 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1723 variable: "p".to_string(),
1724 label: Some("Person".to_string()),
1725 input: None,
1726 })),
1727 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1728 variable: "c".to_string(),
1729 label: Some("Company".to_string()),
1730 input: None,
1731 })),
1732 join_type: JoinType::Left,
1733 conditions: vec![JoinCondition {
1734 left: LogicalExpression::Variable("p".to_string()),
1735 right: LogicalExpression::Variable("c".to_string()),
1736 }],
1737 });
1738
1739 let cardinality = estimator.estimate(&join);
1740 assert!(cardinality >= 1000.0);
1742 }
1743
1744 #[test]
1745 fn test_join_semi() {
1746 let mut estimator = CardinalityEstimator::new();
1747 estimator.add_table_stats("Person", TableStats::new(1000));
1748 estimator.add_table_stats("Company", TableStats::new(100));
1749
1750 let join = LogicalOperator::Join(JoinOp {
1751 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1752 variable: "p".to_string(),
1753 label: Some("Person".to_string()),
1754 input: None,
1755 })),
1756 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1757 variable: "c".to_string(),
1758 label: Some("Company".to_string()),
1759 input: None,
1760 })),
1761 join_type: JoinType::Semi,
1762 conditions: vec![],
1763 });
1764
1765 let cardinality = estimator.estimate(&join);
1766 assert!(cardinality <= 1000.0);
1768 }
1769
1770 #[test]
1771 fn test_join_anti() {
1772 let mut estimator = CardinalityEstimator::new();
1773 estimator.add_table_stats("Person", TableStats::new(1000));
1774 estimator.add_table_stats("Company", TableStats::new(100));
1775
1776 let join = LogicalOperator::Join(JoinOp {
1777 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1778 variable: "p".to_string(),
1779 label: Some("Person".to_string()),
1780 input: None,
1781 })),
1782 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1783 variable: "c".to_string(),
1784 label: Some("Company".to_string()),
1785 input: None,
1786 })),
1787 join_type: JoinType::Anti,
1788 conditions: vec![],
1789 });
1790
1791 let cardinality = estimator.estimate(&join);
1792 assert!(cardinality <= 1000.0);
1794 assert!(cardinality >= 1.0);
1795 }
1796
1797 #[test]
1798 fn test_project_preserves_cardinality() {
1799 let mut estimator = CardinalityEstimator::new();
1800 estimator.add_table_stats("Person", TableStats::new(1000));
1801
1802 let project = LogicalOperator::Project(ProjectOp {
1803 projections: vec![Projection {
1804 expression: LogicalExpression::Variable("n".to_string()),
1805 alias: None,
1806 }],
1807 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1808 variable: "n".to_string(),
1809 label: Some("Person".to_string()),
1810 input: None,
1811 })),
1812 pass_through_input: false,
1813 });
1814
1815 let cardinality = estimator.estimate(&project);
1816 assert!((cardinality - 1000.0).abs() < 0.001);
1817 }
1818
1819 #[test]
1820 fn test_sort_preserves_cardinality() {
1821 let mut estimator = CardinalityEstimator::new();
1822 estimator.add_table_stats("Person", TableStats::new(1000));
1823
1824 let sort = LogicalOperator::Sort(SortOp {
1825 keys: vec![SortKey {
1826 expression: LogicalExpression::Variable("n".to_string()),
1827 order: SortOrder::Ascending,
1828 nulls: None,
1829 }],
1830 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1831 variable: "n".to_string(),
1832 label: Some("Person".to_string()),
1833 input: None,
1834 })),
1835 });
1836
1837 let cardinality = estimator.estimate(&sort);
1838 assert!((cardinality - 1000.0).abs() < 0.001);
1839 }
1840
1841 #[test]
1842 fn test_distinct_reduces_cardinality() {
1843 let mut estimator = CardinalityEstimator::new();
1844 estimator.add_table_stats("Person", TableStats::new(1000));
1845
1846 let distinct = LogicalOperator::Distinct(DistinctOp {
1847 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1848 variable: "n".to_string(),
1849 label: Some("Person".to_string()),
1850 input: None,
1851 })),
1852 columns: None,
1853 });
1854
1855 let cardinality = estimator.estimate(&distinct);
1856 assert!((cardinality - 500.0).abs() < 0.001);
1858 }
1859
1860 #[test]
1861 fn test_skip_reduces_cardinality() {
1862 let mut estimator = CardinalityEstimator::new();
1863 estimator.add_table_stats("Person", TableStats::new(1000));
1864
1865 let skip = LogicalOperator::Skip(SkipOp {
1866 count: 100.into(),
1867 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1868 variable: "n".to_string(),
1869 label: Some("Person".to_string()),
1870 input: None,
1871 })),
1872 });
1873
1874 let cardinality = estimator.estimate(&skip);
1875 assert!((cardinality - 900.0).abs() < 0.001);
1876 }
1877
1878 #[test]
1879 fn test_return_preserves_cardinality() {
1880 let mut estimator = CardinalityEstimator::new();
1881 estimator.add_table_stats("Person", TableStats::new(1000));
1882
1883 let ret = LogicalOperator::Return(ReturnOp {
1884 items: vec![ReturnItem {
1885 expression: LogicalExpression::Variable("n".to_string()),
1886 alias: None,
1887 }],
1888 distinct: false,
1889 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1890 variable: "n".to_string(),
1891 label: Some("Person".to_string()),
1892 input: None,
1893 })),
1894 });
1895
1896 let cardinality = estimator.estimate(&ret);
1897 assert!((cardinality - 1000.0).abs() < 0.001);
1898 }
1899
1900 #[test]
1901 fn test_empty_cardinality() {
1902 let estimator = CardinalityEstimator::new();
1903 let cardinality = estimator.estimate(&LogicalOperator::Empty);
1904 assert!((cardinality).abs() < 0.001);
1905 }
1906
1907 #[test]
1908 fn test_table_stats_with_column() {
1909 let stats = TableStats::new(1000).with_column(
1910 "age",
1911 ColumnStats::new(50).with_nulls(10).with_range(0.0, 100.0),
1912 );
1913
1914 assert_eq!(stats.row_count, 1000);
1915 let col = stats.columns.get("age").unwrap();
1916 assert_eq!(col.distinct_count, 50);
1917 assert_eq!(col.null_count, 10);
1918 assert!((col.min_value.unwrap() - 0.0).abs() < 0.001);
1919 assert!((col.max_value.unwrap() - 100.0).abs() < 0.001);
1920 }
1921
1922 #[test]
1923 fn test_estimator_default() {
1924 let estimator = CardinalityEstimator::default();
1925 let scan = LogicalOperator::NodeScan(NodeScanOp {
1926 variable: "n".to_string(),
1927 label: None,
1928 input: None,
1929 });
1930 let cardinality = estimator.estimate(&scan);
1931 assert!((cardinality - 1000.0).abs() < 0.001);
1932 }
1933
1934 #[test]
1935 fn test_set_avg_fanout() {
1936 let mut estimator = CardinalityEstimator::new();
1937 estimator.add_table_stats("Person", TableStats::new(100));
1938 estimator.set_avg_fanout(5.0);
1939
1940 let expand = LogicalOperator::Expand(ExpandOp {
1941 from_variable: "a".to_string(),
1942 to_variable: "b".to_string(),
1943 edge_variable: None,
1944 direction: ExpandDirection::Outgoing,
1945 edge_types: vec![],
1946 min_hops: 1,
1947 max_hops: Some(1),
1948 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1949 variable: "a".to_string(),
1950 label: Some("Person".to_string()),
1951 input: None,
1952 })),
1953 path_alias: None,
1954 path_mode: PathMode::Walk,
1955 });
1956
1957 let cardinality = estimator.estimate(&expand);
1958 assert!((cardinality - 500.0).abs() < 0.001);
1960 }
1961
1962 #[test]
1963 fn test_multiple_group_by_keys_reduce_cardinality() {
1964 let mut estimator = CardinalityEstimator::new();
1968 estimator.add_table_stats("Person", TableStats::new(10000));
1969
1970 let single_group = LogicalOperator::Aggregate(AggregateOp {
1971 group_by: vec![LogicalExpression::Property {
1972 variable: "n".to_string(),
1973 property: "city".to_string(),
1974 }],
1975 aggregates: vec![],
1976 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1977 variable: "n".to_string(),
1978 label: Some("Person".to_string()),
1979 input: None,
1980 })),
1981 having: None,
1982 });
1983
1984 let multi_group = LogicalOperator::Aggregate(AggregateOp {
1985 group_by: vec![
1986 LogicalExpression::Property {
1987 variable: "n".to_string(),
1988 property: "city".to_string(),
1989 },
1990 LogicalExpression::Property {
1991 variable: "n".to_string(),
1992 property: "country".to_string(),
1993 },
1994 ],
1995 aggregates: vec![],
1996 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1997 variable: "n".to_string(),
1998 label: Some("Person".to_string()),
1999 input: None,
2000 })),
2001 having: None,
2002 });
2003
2004 let single_card = estimator.estimate(&single_group);
2005 let multi_card = estimator.estimate(&multi_group);
2006
2007 assert!(single_card < 10000.0);
2009 assert!(multi_card < 10000.0);
2010 assert!(single_card >= 1.0);
2012 assert!(multi_card >= 1.0);
2013 }
2014
2015 #[test]
2018 fn test_histogram_build_uniform() {
2019 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2021 let histogram = EquiDepthHistogram::build(&values, 10);
2022
2023 assert_eq!(histogram.num_buckets(), 10);
2024 assert_eq!(histogram.total_rows(), 100);
2025
2026 for bucket in histogram.buckets() {
2028 assert!(bucket.frequency >= 9 && bucket.frequency <= 11);
2029 }
2030 }
2031
2032 #[test]
2033 fn test_histogram_build_skewed() {
2034 let mut values: Vec<f64> = (0..80).map(|i| i as f64).collect();
2036 values.extend((0..20).map(|i| 1000.0 + i as f64));
2037 let histogram = EquiDepthHistogram::build(&values, 5);
2038
2039 assert_eq!(histogram.num_buckets(), 5);
2040 assert_eq!(histogram.total_rows(), 100);
2041
2042 for bucket in histogram.buckets() {
2044 assert!(bucket.frequency >= 18 && bucket.frequency <= 22);
2045 }
2046 }
2047
2048 #[test]
2049 fn test_histogram_range_selectivity_full() {
2050 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2051 let histogram = EquiDepthHistogram::build(&values, 10);
2052
2053 let selectivity = histogram.range_selectivity(None, None);
2055 assert!((selectivity - 1.0).abs() < 0.01);
2056 }
2057
2058 #[test]
2059 fn test_histogram_range_selectivity_half() {
2060 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2061 let histogram = EquiDepthHistogram::build(&values, 10);
2062
2063 let selectivity = histogram.range_selectivity(Some(50.0), None);
2065 assert!(selectivity > 0.4 && selectivity < 0.6);
2066 }
2067
2068 #[test]
2069 fn test_histogram_range_selectivity_quarter() {
2070 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2071 let histogram = EquiDepthHistogram::build(&values, 10);
2072
2073 let selectivity = histogram.range_selectivity(None, Some(25.0));
2075 assert!(selectivity > 0.2 && selectivity < 0.3);
2076 }
2077
2078 #[test]
2079 fn test_histogram_equality_selectivity() {
2080 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2081 let histogram = EquiDepthHistogram::build(&values, 10);
2082
2083 let selectivity = histogram.equality_selectivity(50.0);
2085 assert!(selectivity > 0.005 && selectivity < 0.02);
2086 }
2087
2088 #[test]
2089 fn test_histogram_empty() {
2090 let histogram = EquiDepthHistogram::build(&[], 10);
2091
2092 assert_eq!(histogram.num_buckets(), 0);
2093 assert_eq!(histogram.total_rows(), 0);
2094
2095 let selectivity = histogram.range_selectivity(Some(0.0), Some(100.0));
2097 assert!((selectivity - 0.33).abs() < 0.01);
2098 }
2099
2100 #[test]
2101 fn test_histogram_bucket_overlap() {
2102 let bucket = HistogramBucket::new(10.0, 20.0, 100, 10);
2103
2104 assert!((bucket.overlap_fraction(Some(10.0), Some(20.0)) - 1.0).abs() < 0.01);
2106
2107 assert!((bucket.overlap_fraction(Some(10.0), Some(15.0)) - 0.5).abs() < 0.01);
2109
2110 assert!((bucket.overlap_fraction(Some(15.0), Some(20.0)) - 0.5).abs() < 0.01);
2112
2113 assert!((bucket.overlap_fraction(Some(0.0), Some(5.0))).abs() < 0.01);
2115
2116 assert!((bucket.overlap_fraction(Some(25.0), Some(30.0))).abs() < 0.01);
2118 }
2119
2120 #[test]
2121 fn test_column_stats_from_values() {
2122 let values = vec![10.0, 20.0, 30.0, 40.0, 50.0, 20.0, 30.0, 40.0];
2123 let stats = ColumnStats::from_values(values, 4);
2124
2125 assert_eq!(stats.distinct_count, 5); assert!(stats.min_value.is_some());
2127 assert!((stats.min_value.unwrap() - 10.0).abs() < 0.01);
2128 assert!(stats.max_value.is_some());
2129 assert!((stats.max_value.unwrap() - 50.0).abs() < 0.01);
2130 assert!(stats.histogram.is_some());
2131 }
2132
2133 #[test]
2134 fn test_column_stats_with_histogram_builder() {
2135 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2136 let histogram = EquiDepthHistogram::build(&values, 10);
2137
2138 let stats = ColumnStats::new(100)
2139 .with_range(0.0, 99.0)
2140 .with_histogram(histogram);
2141
2142 assert!(stats.histogram.is_some());
2143 assert_eq!(stats.histogram.as_ref().unwrap().num_buckets(), 10);
2144 }
2145
2146 #[test]
2147 fn test_filter_with_histogram_stats() {
2148 let mut estimator = CardinalityEstimator::new();
2149
2150 let age_values: Vec<f64> = (18..80).map(|i| i as f64).collect();
2152 let histogram = EquiDepthHistogram::build(&age_values, 10);
2153 let age_stats = ColumnStats::new(62)
2154 .with_range(18.0, 79.0)
2155 .with_histogram(histogram);
2156
2157 estimator.add_table_stats(
2158 "Person",
2159 TableStats::new(1000).with_column("age", age_stats),
2160 );
2161
2162 let filter = LogicalOperator::Filter(FilterOp {
2165 predicate: LogicalExpression::Binary {
2166 left: Box::new(LogicalExpression::Property {
2167 variable: "n".to_string(),
2168 property: "age".to_string(),
2169 }),
2170 op: BinaryOp::Gt,
2171 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2172 },
2173 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2174 variable: "n".to_string(),
2175 label: Some("Person".to_string()),
2176 input: None,
2177 })),
2178 pushdown_hint: None,
2179 });
2180
2181 let cardinality = estimator.estimate(&filter);
2182
2183 assert!(cardinality > 300.0 && cardinality < 600.0);
2186 }
2187
2188 #[test]
2189 fn test_filter_equality_with_histogram() {
2190 let mut estimator = CardinalityEstimator::new();
2191
2192 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2194 let histogram = EquiDepthHistogram::build(&values, 10);
2195 let stats = ColumnStats::new(100)
2196 .with_range(0.0, 99.0)
2197 .with_histogram(histogram);
2198
2199 estimator.add_table_stats("Data", TableStats::new(1000).with_column("value", stats));
2200
2201 let filter = LogicalOperator::Filter(FilterOp {
2203 predicate: LogicalExpression::Binary {
2204 left: Box::new(LogicalExpression::Property {
2205 variable: "d".to_string(),
2206 property: "value".to_string(),
2207 }),
2208 op: BinaryOp::Eq,
2209 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2210 },
2211 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2212 variable: "d".to_string(),
2213 label: Some("Data".to_string()),
2214 input: None,
2215 })),
2216 pushdown_hint: None,
2217 });
2218
2219 let cardinality = estimator.estimate(&filter);
2220
2221 assert!((1.0..50.0).contains(&cardinality));
2224 }
2225
2226 #[test]
2227 fn test_histogram_min_max() {
2228 let values: Vec<f64> = vec![5.0, 10.0, 15.0, 20.0, 25.0];
2229 let histogram = EquiDepthHistogram::build(&values, 2);
2230
2231 assert_eq!(histogram.min_value(), Some(5.0));
2232 assert!(histogram.max_value().is_some());
2234 }
2235
2236 #[test]
2239 fn test_selectivity_config_defaults() {
2240 let config = SelectivityConfig::new();
2241 assert!((config.default - 0.1).abs() < f64::EPSILON);
2242 assert!((config.equality - 0.01).abs() < f64::EPSILON);
2243 assert!((config.inequality - 0.99).abs() < f64::EPSILON);
2244 assert!((config.range - 0.33).abs() < f64::EPSILON);
2245 assert!((config.string_ops - 0.1).abs() < f64::EPSILON);
2246 assert!((config.membership - 0.1).abs() < f64::EPSILON);
2247 assert!((config.is_null - 0.05).abs() < f64::EPSILON);
2248 assert!((config.is_not_null - 0.95).abs() < f64::EPSILON);
2249 assert!((config.distinct_fraction - 0.5).abs() < f64::EPSILON);
2250 }
2251
2252 #[test]
2253 fn test_custom_selectivity_config() {
2254 let config = SelectivityConfig {
2255 equality: 0.05,
2256 range: 0.25,
2257 ..SelectivityConfig::new()
2258 };
2259 let estimator = CardinalityEstimator::with_selectivity_config(config);
2260 assert!((estimator.selectivity_config().equality - 0.05).abs() < f64::EPSILON);
2261 assert!((estimator.selectivity_config().range - 0.25).abs() < f64::EPSILON);
2262 }
2263
2264 #[test]
2265 fn test_custom_selectivity_affects_estimation() {
2266 let mut default_est = CardinalityEstimator::new();
2268 default_est.add_table_stats("Person", TableStats::new(1000));
2269
2270 let filter = LogicalOperator::Filter(FilterOp {
2271 predicate: LogicalExpression::Binary {
2272 left: Box::new(LogicalExpression::Property {
2273 variable: "n".to_string(),
2274 property: "name".to_string(),
2275 }),
2276 op: BinaryOp::Eq,
2277 right: Box::new(LogicalExpression::Literal(Value::String("Alix".into()))),
2278 },
2279 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2280 variable: "n".to_string(),
2281 label: Some("Person".to_string()),
2282 input: None,
2283 })),
2284 pushdown_hint: None,
2285 });
2286
2287 let default_card = default_est.estimate(&filter);
2288
2289 let config = SelectivityConfig {
2291 equality: 0.2,
2292 ..SelectivityConfig::new()
2293 };
2294 let mut custom_est = CardinalityEstimator::with_selectivity_config(config);
2295 custom_est.add_table_stats("Person", TableStats::new(1000));
2296
2297 let custom_card = custom_est.estimate(&filter);
2298
2299 assert!(custom_card > default_card);
2300 assert!((custom_card - 200.0).abs() < 1.0);
2301 }
2302
2303 #[test]
2304 fn test_custom_range_selectivity() {
2305 let config = SelectivityConfig {
2306 range: 0.5,
2307 ..SelectivityConfig::new()
2308 };
2309 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2310 estimator.add_table_stats("Person", TableStats::new(1000));
2311
2312 let filter = LogicalOperator::Filter(FilterOp {
2313 predicate: LogicalExpression::Binary {
2314 left: Box::new(LogicalExpression::Property {
2315 variable: "n".to_string(),
2316 property: "age".to_string(),
2317 }),
2318 op: BinaryOp::Gt,
2319 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2320 },
2321 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2322 variable: "n".to_string(),
2323 label: Some("Person".to_string()),
2324 input: None,
2325 })),
2326 pushdown_hint: None,
2327 });
2328
2329 let cardinality = estimator.estimate(&filter);
2330 assert!((cardinality - 500.0).abs() < 1.0);
2332 }
2333
2334 #[test]
2335 fn test_custom_distinct_fraction() {
2336 let config = SelectivityConfig {
2337 distinct_fraction: 0.8,
2338 ..SelectivityConfig::new()
2339 };
2340 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2341 estimator.add_table_stats("Person", TableStats::new(1000));
2342
2343 let distinct = LogicalOperator::Distinct(DistinctOp {
2344 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2345 variable: "n".to_string(),
2346 label: Some("Person".to_string()),
2347 input: None,
2348 })),
2349 columns: None,
2350 });
2351
2352 let cardinality = estimator.estimate(&distinct);
2353 assert!((cardinality - 800.0).abs() < 1.0);
2355 }
2356
2357 #[test]
2360 fn test_estimation_log_basic() {
2361 let mut log = EstimationLog::new(10.0);
2362 log.record("NodeScan(Person)", 1000.0, 1200.0);
2363 log.record("Filter(age > 30)", 100.0, 90.0);
2364
2365 assert_eq!(log.entries().len(), 2);
2366 assert!(!log.should_replan()); }
2368
2369 #[test]
2370 fn test_estimation_log_triggers_replan() {
2371 let mut log = EstimationLog::new(10.0);
2372 log.record("NodeScan(Person)", 100.0, 5000.0); assert!(log.should_replan());
2375 }
2376
2377 #[test]
2378 fn test_estimation_log_overestimate_triggers_replan() {
2379 let mut log = EstimationLog::new(5.0);
2380 log.record("Filter", 1000.0, 100.0); assert!(log.should_replan()); }
2384
2385 #[test]
2386 fn test_estimation_entry_error_ratio() {
2387 let entry = EstimationEntry {
2388 operator: "test".into(),
2389 estimated: 100.0,
2390 actual: 200.0,
2391 };
2392 assert!((entry.error_ratio() - 2.0).abs() < f64::EPSILON);
2393
2394 let perfect = EstimationEntry {
2395 operator: "test".into(),
2396 estimated: 100.0,
2397 actual: 100.0,
2398 };
2399 assert!((perfect.error_ratio() - 1.0).abs() < f64::EPSILON);
2400
2401 let zero_est = EstimationEntry {
2402 operator: "test".into(),
2403 estimated: 0.0,
2404 actual: 0.0,
2405 };
2406 assert!((zero_est.error_ratio() - 1.0).abs() < f64::EPSILON);
2407 }
2408
2409 #[test]
2410 fn test_estimation_log_max_error_ratio() {
2411 let mut log = EstimationLog::new(10.0);
2412 log.record("A", 100.0, 300.0); log.record("B", 100.0, 50.0); log.record("C", 100.0, 100.0); assert!((log.max_error_ratio() - 3.0).abs() < f64::EPSILON);
2417 }
2418
2419 #[test]
2420 fn test_estimation_log_clear() {
2421 let mut log = EstimationLog::new(10.0);
2422 log.record("A", 100.0, 100.0);
2423 assert_eq!(log.entries().len(), 1);
2424
2425 log.clear();
2426 assert!(log.entries().is_empty());
2427 assert!(!log.should_replan());
2428 }
2429
2430 #[test]
2431 fn test_create_estimation_log() {
2432 let log = CardinalityEstimator::create_estimation_log();
2433 assert!(log.entries().is_empty());
2434 assert!(!log.should_replan());
2435 }
2436
2437 #[test]
2438 fn test_equality_selectivity_empty_histogram() {
2439 let hist = EquiDepthHistogram::new(vec![]);
2440 assert_eq!(hist.equality_selectivity(5.0), 0.01);
2442 }
2443
2444 #[test]
2445 fn test_equality_selectivity_value_in_bucket() {
2446 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2447 let hist = EquiDepthHistogram::build(&values, 2);
2448 let sel = hist.equality_selectivity(3.0);
2449 assert!(sel > 0.0);
2450 assert!(sel <= 1.0);
2451 }
2452
2453 #[test]
2454 fn test_equality_selectivity_value_outside_all_buckets() {
2455 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2456 let hist = EquiDepthHistogram::build(&values, 2);
2457 let sel = hist.equality_selectivity(9999.0);
2459 assert_eq!(sel, 0.001);
2460 }
2461
2462 #[test]
2463 fn test_histogram_min_max_empty() {
2464 let hist = EquiDepthHistogram::new(vec![]);
2465 assert_eq!(hist.min_value(), None);
2466 assert_eq!(hist.max_value(), None);
2467 }
2468
2469 #[test]
2470 fn test_histogram_min_max_single_bucket() {
2471 let hist = EquiDepthHistogram::new(vec![HistogramBucket::new(1.0, 10.0, 5, 5)]);
2472 assert_eq!(hist.min_value(), Some(1.0));
2473 assert_eq!(hist.max_value(), Some(10.0));
2474 }
2475
2476 #[test]
2477 fn test_histogram_min_max_multi_bucket() {
2478 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0];
2479 let hist = EquiDepthHistogram::build(&values, 3);
2480 let min = hist.min_value().unwrap();
2481 let max = hist.max_value().unwrap();
2482 assert!((min - 1.0).abs() < 1e-9, "min should be 1.0, got {min}");
2483 assert!(max >= 20.0, "max should be >= last value, got {max}");
2484 }
2485
2486 #[test]
2487 fn test_count_and_conjuncts_single_expression() {
2488 use crate::query::plan::LogicalExpression;
2489 let expr = LogicalExpression::Literal(Value::Bool(true));
2490 assert_eq!(count_and_conjuncts(&expr), 1);
2491 }
2492
2493 #[test]
2494 fn test_count_and_conjuncts_flat_and() {
2495 use crate::query::plan::{BinaryOp, LogicalExpression};
2496 let expr = LogicalExpression::Binary {
2497 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2498 op: BinaryOp::And,
2499 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2500 };
2501 assert_eq!(count_and_conjuncts(&expr), 2);
2502 }
2503
2504 #[test]
2505 fn test_count_and_conjuncts_nested_and() {
2506 use crate::query::plan::{BinaryOp, LogicalExpression};
2507 let ab = LogicalExpression::Binary {
2508 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2509 op: BinaryOp::And,
2510 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2511 };
2512 let cd = LogicalExpression::Binary {
2513 left: Box::new(LogicalExpression::Literal(Value::Int64(1))),
2514 op: BinaryOp::And,
2515 right: Box::new(LogicalExpression::Literal(Value::Int64(2))),
2516 };
2517 let expr = LogicalExpression::Binary {
2518 left: Box::new(ab),
2519 op: BinaryOp::And,
2520 right: Box::new(cd),
2521 };
2522 assert_eq!(count_and_conjuncts(&expr), 4);
2523 }
2524
2525 #[test]
2526 fn test_count_distinct_empty() {
2527 assert_eq!(count_distinct(&[]), 0);
2528 }
2529
2530 #[test]
2531 fn test_count_distinct_all_unique() {
2532 assert_eq!(count_distinct(&[1.0, 2.0, 3.0, 4.0]), 4);
2533 }
2534
2535 #[test]
2536 fn test_count_distinct_with_duplicates() {
2537 assert_eq!(count_distinct(&[1.0, 1.0, 2.0, 2.0, 3.0]), 3);
2538 }
2539
2540 #[test]
2541 fn test_count_distinct_all_same() {
2542 assert_eq!(count_distinct(&[5.0, 5.0, 5.0]), 1);
2543 }
2544
2545 #[test]
2546 fn test_count_distinct_single_value() {
2547 assert_eq!(count_distinct(&[42.0]), 1);
2548 }
2549}