1use crate::query::plan::{
18 AggregateOp, BinaryOp, DistinctOp, ExpandOp, FilterOp, JoinOp, JoinType, LeftJoinOp, LimitOp,
19 LogicalExpression, LogicalOperator, MultiWayJoinOp, NodeScanOp, ProjectOp, SkipOp, SortOp,
20 TripleComponent, TripleScanOp, UnaryOp, VectorJoinOp, VectorScanOp,
21};
22use std::collections::HashMap;
23
24#[derive(Debug, Clone)]
30pub struct HistogramBucket {
31 pub lower_bound: f64,
33 pub upper_bound: f64,
35 pub frequency: u64,
37 pub distinct_count: u64,
39}
40
41impl HistogramBucket {
42 #[must_use]
44 pub fn new(lower_bound: f64, upper_bound: f64, frequency: u64, distinct_count: u64) -> Self {
45 Self {
46 lower_bound,
47 upper_bound,
48 frequency,
49 distinct_count,
50 }
51 }
52
53 #[must_use]
55 pub fn width(&self) -> f64 {
56 self.upper_bound - self.lower_bound
57 }
58
59 #[must_use]
61 pub fn contains(&self, value: f64) -> bool {
62 value >= self.lower_bound && value < self.upper_bound
63 }
64
65 #[must_use]
67 pub fn overlap_fraction(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
68 let effective_lower = lower.unwrap_or(self.lower_bound).max(self.lower_bound);
69 let effective_upper = upper.unwrap_or(self.upper_bound).min(self.upper_bound);
70
71 let bucket_width = self.width();
72 if bucket_width <= 0.0 {
73 return if effective_lower <= self.lower_bound && effective_upper >= self.upper_bound {
74 1.0
75 } else {
76 0.0
77 };
78 }
79
80 let overlap = (effective_upper - effective_lower).max(0.0);
81 (overlap / bucket_width).min(1.0)
82 }
83}
84
85#[derive(Debug, Clone)]
105pub struct EquiDepthHistogram {
106 buckets: Vec<HistogramBucket>,
108 total_rows: u64,
110}
111
112impl EquiDepthHistogram {
113 #[must_use]
115 pub fn new(buckets: Vec<HistogramBucket>) -> Self {
116 let total_rows = buckets.iter().map(|b| b.frequency).sum();
117 Self {
118 buckets,
119 total_rows,
120 }
121 }
122
123 #[must_use]
132 pub fn build(values: &[f64], num_buckets: usize) -> Self {
133 if values.is_empty() || num_buckets == 0 {
134 return Self {
135 buckets: Vec::new(),
136 total_rows: 0,
137 };
138 }
139
140 let num_buckets = num_buckets.min(values.len());
141 let rows_per_bucket = (values.len() + num_buckets - 1) / num_buckets;
142 let mut buckets = Vec::with_capacity(num_buckets);
143
144 let mut start_idx = 0;
145 while start_idx < values.len() {
146 let end_idx = (start_idx + rows_per_bucket).min(values.len());
147 let lower_bound = values[start_idx];
148 let upper_bound = if end_idx < values.len() {
149 values[end_idx]
150 } else {
151 values[end_idx - 1] + 1.0
153 };
154
155 let bucket_values = &values[start_idx..end_idx];
157 let distinct_count = count_distinct(bucket_values);
158
159 buckets.push(HistogramBucket::new(
160 lower_bound,
161 upper_bound,
162 (end_idx - start_idx) as u64,
163 distinct_count,
164 ));
165
166 start_idx = end_idx;
167 }
168
169 Self::new(buckets)
170 }
171
172 #[must_use]
174 pub fn num_buckets(&self) -> usize {
175 self.buckets.len()
176 }
177
178 #[must_use]
180 pub fn total_rows(&self) -> u64 {
181 self.total_rows
182 }
183
184 #[must_use]
186 pub fn buckets(&self) -> &[HistogramBucket] {
187 &self.buckets
188 }
189
190 #[must_use]
199 pub fn range_selectivity(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
200 if self.buckets.is_empty() || self.total_rows == 0 {
201 return 0.33; }
203
204 let mut matching_rows = 0.0;
205
206 for bucket in &self.buckets {
207 let bucket_lower = bucket.lower_bound;
209 let bucket_upper = bucket.upper_bound;
210
211 if let Some(l) = lower
213 && bucket_upper <= l
214 {
215 continue;
216 }
217 if let Some(u) = upper
218 && bucket_lower >= u
219 {
220 continue;
221 }
222
223 let overlap = bucket.overlap_fraction(lower, upper);
225 matching_rows += overlap * bucket.frequency as f64;
226 }
227
228 (matching_rows / self.total_rows as f64).clamp(0.0, 1.0)
229 }
230
231 #[must_use]
235 pub fn equality_selectivity(&self, value: f64) -> f64 {
236 if self.buckets.is_empty() || self.total_rows == 0 {
237 return 0.01; }
239
240 for bucket in &self.buckets {
242 if bucket.contains(value) {
243 if bucket.distinct_count > 0 {
245 return (bucket.frequency as f64
246 / bucket.distinct_count as f64
247 / self.total_rows as f64)
248 .min(1.0);
249 }
250 }
251 }
252
253 0.001
255 }
256
257 #[must_use]
259 pub fn min_value(&self) -> Option<f64> {
260 self.buckets.first().map(|b| b.lower_bound)
261 }
262
263 #[must_use]
265 pub fn max_value(&self) -> Option<f64> {
266 self.buckets.last().map(|b| b.upper_bound)
267 }
268}
269
270fn count_and_conjuncts(expr: &LogicalExpression) -> usize {
276 match expr {
277 LogicalExpression::Binary {
278 op: BinaryOp::And,
279 left,
280 right,
281 } => count_and_conjuncts(left) + count_and_conjuncts(right),
282 _ => 1,
283 }
284}
285
286fn count_distinct(sorted_values: &[f64]) -> u64 {
287 if sorted_values.is_empty() {
288 return 0;
289 }
290
291 let mut count = 1u64;
292 let mut prev = sorted_values[0];
293
294 for &val in &sorted_values[1..] {
295 if (val - prev).abs() > f64::EPSILON {
296 count += 1;
297 prev = val;
298 }
299 }
300
301 count
302}
303
304#[derive(Debug, Clone)]
306pub struct TableStats {
307 pub row_count: u64,
309 pub columns: HashMap<String, ColumnStats>,
311}
312
313impl TableStats {
314 #[must_use]
316 pub fn new(row_count: u64) -> Self {
317 Self {
318 row_count,
319 columns: HashMap::new(),
320 }
321 }
322
323 pub fn with_column(mut self, name: &str, stats: ColumnStats) -> Self {
325 self.columns.insert(name.to_string(), stats);
326 self
327 }
328}
329
330#[derive(Debug, Clone)]
332pub struct ColumnStats {
333 pub distinct_count: u64,
335 pub null_count: u64,
337 pub min_value: Option<f64>,
339 pub max_value: Option<f64>,
341 pub histogram: Option<EquiDepthHistogram>,
343}
344
345impl ColumnStats {
346 #[must_use]
348 pub fn new(distinct_count: u64) -> Self {
349 Self {
350 distinct_count,
351 null_count: 0,
352 min_value: None,
353 max_value: None,
354 histogram: None,
355 }
356 }
357
358 #[must_use]
360 pub fn with_nulls(mut self, null_count: u64) -> Self {
361 self.null_count = null_count;
362 self
363 }
364
365 #[must_use]
367 pub fn with_range(mut self, min: f64, max: f64) -> Self {
368 self.min_value = Some(min);
369 self.max_value = Some(max);
370 self
371 }
372
373 #[must_use]
375 pub fn with_histogram(mut self, histogram: EquiDepthHistogram) -> Self {
376 self.histogram = Some(histogram);
377 self
378 }
379
380 #[must_use]
388 pub fn from_values(mut values: Vec<f64>, num_buckets: usize) -> Self {
389 if values.is_empty() {
390 return Self::new(0);
391 }
392
393 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
395
396 let min = values.first().copied();
397 let max = values.last().copied();
398 let distinct_count = count_distinct(&values);
399 let histogram = EquiDepthHistogram::build(&values, num_buckets);
400
401 Self {
402 distinct_count,
403 null_count: 0,
404 min_value: min,
405 max_value: max,
406 histogram: Some(histogram),
407 }
408 }
409}
410
411#[derive(Debug, Clone)]
417pub struct SelectivityConfig {
418 pub default: f64,
420 pub equality: f64,
422 pub inequality: f64,
424 pub range: f64,
426 pub string_ops: f64,
428 pub membership: f64,
430 pub is_null: f64,
432 pub is_not_null: f64,
434 pub distinct_fraction: f64,
436}
437
438impl SelectivityConfig {
439 #[must_use]
441 pub fn new() -> Self {
442 Self {
443 default: 0.1,
444 equality: 0.01,
445 inequality: 0.99,
446 range: 0.33,
447 string_ops: 0.1,
448 membership: 0.1,
449 is_null: 0.05,
450 is_not_null: 0.95,
451 distinct_fraction: 0.5,
452 }
453 }
454}
455
456impl Default for SelectivityConfig {
457 fn default() -> Self {
458 Self::new()
459 }
460}
461
462#[derive(Debug, Clone)]
464pub struct EstimationEntry {
465 pub operator: String,
467 pub estimated: f64,
469 pub actual: f64,
471}
472
473impl EstimationEntry {
474 #[must_use]
480 pub fn error_ratio(&self) -> f64 {
481 if self.estimated.abs() < f64::EPSILON {
482 if self.actual.abs() < f64::EPSILON {
483 1.0
484 } else {
485 f64::INFINITY
486 }
487 } else {
488 self.actual / self.estimated
489 }
490 }
491}
492
493#[derive(Debug, Clone, Default)]
500pub struct EstimationLog {
501 entries: Vec<EstimationEntry>,
503 replan_threshold: f64,
507}
508
509impl EstimationLog {
510 #[must_use]
512 pub fn new(replan_threshold: f64) -> Self {
513 Self {
514 entries: Vec::new(),
515 replan_threshold,
516 }
517 }
518
519 pub fn record(&mut self, operator: impl Into<String>, estimated: f64, actual: f64) {
521 self.entries.push(EstimationEntry {
522 operator: operator.into(),
523 estimated,
524 actual,
525 });
526 }
527
528 #[must_use]
530 pub fn entries(&self) -> &[EstimationEntry] {
531 &self.entries
532 }
533
534 #[must_use]
537 pub fn should_replan(&self) -> bool {
538 self.entries.iter().any(|e| {
539 let ratio = e.error_ratio();
540 ratio > self.replan_threshold || ratio < 1.0 / self.replan_threshold
541 })
542 }
543
544 #[must_use]
546 pub fn max_error_ratio(&self) -> f64 {
547 self.entries
548 .iter()
549 .map(|e| {
550 let r = e.error_ratio();
551 if r < 1.0 { 1.0 / r } else { r }
553 })
554 .fold(1.0_f64, f64::max)
555 }
556
557 pub fn clear(&mut self) {
559 self.entries.clear();
560 }
561}
562
563pub struct CardinalityEstimator {
565 table_stats: HashMap<String, TableStats>,
567 default_row_count: u64,
569 default_selectivity: f64,
571 avg_fanout: f64,
573 selectivity_config: SelectivityConfig,
575 rdf_statistics: Option<grafeo_core::statistics::RdfStatistics>,
577}
578
579impl CardinalityEstimator {
580 #[must_use]
582 pub fn new() -> Self {
583 let config = SelectivityConfig::new();
584 Self {
585 table_stats: HashMap::new(),
586 default_row_count: 1000,
587 default_selectivity: config.default,
588 avg_fanout: 10.0,
589 selectivity_config: config,
590 rdf_statistics: None,
591 }
592 }
593
594 #[must_use]
596 pub fn with_selectivity_config(config: SelectivityConfig) -> Self {
597 Self {
598 table_stats: HashMap::new(),
599 default_row_count: 1000,
600 default_selectivity: config.default,
601 avg_fanout: 10.0,
602 selectivity_config: config,
603 rdf_statistics: None,
604 }
605 }
606
607 #[must_use]
609 pub fn selectivity_config(&self) -> &SelectivityConfig {
610 &self.selectivity_config
611 }
612
613 #[must_use]
615 pub fn create_estimation_log() -> EstimationLog {
616 EstimationLog::new(10.0)
617 }
618
619 #[must_use]
625 pub fn from_statistics(stats: &grafeo_core::statistics::Statistics) -> Self {
626 let mut estimator = Self::new();
627
628 if stats.total_nodes > 0 {
630 estimator.default_row_count = stats.total_nodes;
631 }
632
633 for (label, label_stats) in &stats.labels {
635 let mut table_stats = TableStats::new(label_stats.node_count);
636
637 for (prop, col_stats) in &label_stats.properties {
639 let optimizer_col =
640 ColumnStats::new(col_stats.distinct_count).with_nulls(col_stats.null_count);
641 table_stats = table_stats.with_column(prop, optimizer_col);
642 }
643
644 estimator.add_table_stats(label, table_stats);
645 }
646
647 if !stats.edge_types.is_empty() {
649 let total_out_degree: f64 = stats.edge_types.values().map(|e| e.avg_out_degree).sum();
650 estimator.avg_fanout = total_out_degree / stats.edge_types.len() as f64;
651 } else if stats.total_nodes > 0 {
652 estimator.avg_fanout = stats.total_edges as f64 / stats.total_nodes as f64;
653 }
654
655 if estimator.avg_fanout < 1.0 {
657 estimator.avg_fanout = 1.0;
658 }
659
660 estimator
661 }
662
663 #[must_use]
668 pub fn from_rdf_statistics(rdf_stats: grafeo_core::statistics::RdfStatistics) -> Self {
669 let mut estimator = Self::new();
670 if rdf_stats.total_triples > 0 {
671 estimator.default_row_count = rdf_stats.total_triples;
672 }
673 estimator.rdf_statistics = Some(rdf_stats);
674 estimator
675 }
676
677 pub fn add_table_stats(&mut self, name: &str, stats: TableStats) {
679 self.table_stats.insert(name.to_string(), stats);
680 }
681
682 pub fn set_avg_fanout(&mut self, fanout: f64) {
684 self.avg_fanout = fanout;
685 }
686
687 #[must_use]
689 pub fn estimate(&self, op: &LogicalOperator) -> f64 {
690 match op {
691 LogicalOperator::NodeScan(scan) => self.estimate_node_scan(scan),
692 LogicalOperator::Filter(filter) => self.estimate_filter(filter),
693 LogicalOperator::Project(project) => self.estimate_project(project),
694 LogicalOperator::Expand(expand) => self.estimate_expand(expand),
695 LogicalOperator::Join(join) => self.estimate_join(join),
696 LogicalOperator::Aggregate(agg) => self.estimate_aggregate(agg),
697 LogicalOperator::Sort(sort) => self.estimate_sort(sort),
698 LogicalOperator::Distinct(distinct) => self.estimate_distinct(distinct),
699 LogicalOperator::Limit(limit) => self.estimate_limit(limit),
700 LogicalOperator::Skip(skip) => self.estimate_skip(skip),
701 LogicalOperator::Return(ret) => self.estimate(&ret.input),
702 LogicalOperator::Empty => 0.0,
703 LogicalOperator::VectorScan(scan) => self.estimate_vector_scan(scan),
704 LogicalOperator::VectorJoin(join) => self.estimate_vector_join(join),
705 LogicalOperator::MultiWayJoin(mwj) => self.estimate_multi_way_join(mwj),
706 LogicalOperator::LeftJoin(lj) => self.estimate_left_join(lj),
707 LogicalOperator::TripleScan(scan) => self.estimate_triple_scan(scan),
708 _ => self.default_row_count as f64,
709 }
710 }
711
712 fn estimate_node_scan(&self, scan: &NodeScanOp) -> f64 {
714 if let Some(label) = &scan.label
715 && let Some(stats) = self.table_stats.get(label)
716 {
717 return stats.row_count as f64;
718 }
719 self.default_row_count as f64
721 }
722
723 fn estimate_triple_scan(&self, scan: &TripleScanOp) -> f64 {
729 let base = if let Some(ref input) = scan.input {
732 self.estimate(input)
733 } else {
734 1.0
735 };
736
737 let Some(rdf_stats) = &self.rdf_statistics else {
738 return if scan.input.is_some() {
739 base * self.default_row_count as f64
740 } else {
741 self.default_row_count as f64
742 };
743 };
744
745 let subject_bound = matches!(
746 scan.subject,
747 TripleComponent::Iri(_) | TripleComponent::Literal(_)
748 );
749 let object_bound = matches!(
750 scan.object,
751 TripleComponent::Iri(_) | TripleComponent::Literal(_)
752 );
753 let predicate_iri = match &scan.predicate {
754 TripleComponent::Iri(iri) => Some(iri.as_str()),
755 _ => None,
756 };
757
758 let pattern_card = rdf_stats.estimate_triple_pattern_cardinality(
759 subject_bound,
760 predicate_iri,
761 object_bound,
762 );
763
764 if scan.input.is_some() {
765 let selectivity = if rdf_stats.total_triples > 0 {
767 pattern_card / rdf_stats.total_triples as f64
768 } else {
769 1.0
770 };
771 (base * pattern_card * selectivity).max(1.0)
772 } else {
773 pattern_card.max(1.0)
774 }
775 }
776
777 fn estimate_filter(&self, filter: &FilterOp) -> f64 {
779 let input_cardinality = self.estimate(&filter.input);
780 let selectivity = self.estimate_selectivity(&filter.predicate);
781 (input_cardinality * selectivity).max(1.0)
782 }
783
784 fn estimate_project(&self, project: &ProjectOp) -> f64 {
786 self.estimate(&project.input)
787 }
788
789 fn estimate_expand(&self, expand: &ExpandOp) -> f64 {
791 let input_cardinality = self.estimate(&expand.input);
792
793 let fanout = if !expand.edge_types.is_empty() {
795 self.avg_fanout * 0.5
797 } else {
798 self.avg_fanout
799 };
800
801 let path_multiplier = if expand.max_hops.unwrap_or(1) > 1 {
803 let min = expand.min_hops as f64;
804 let max = expand.max_hops.unwrap_or(expand.min_hops + 3) as f64;
805 (fanout.powf(max + 1.0) - fanout.powf(min)) / (fanout - 1.0)
807 } else {
808 fanout
809 };
810
811 (input_cardinality * path_multiplier).max(1.0)
812 }
813
814 fn estimate_join(&self, join: &JoinOp) -> f64 {
816 let left_card = self.estimate(&join.left);
817 let right_card = self.estimate(&join.right);
818
819 match join.join_type {
820 JoinType::Cross => left_card * right_card,
821 JoinType::Inner => {
822 let selectivity = if join.conditions.is_empty() {
824 1.0 } else {
826 0.1_f64.powi(join.conditions.len() as i32)
828 };
829 (left_card * right_card * selectivity).max(1.0)
830 }
831 JoinType::Left => {
832 let inner_card = self.estimate_join(&JoinOp {
834 left: join.left.clone(),
835 right: join.right.clone(),
836 join_type: JoinType::Inner,
837 conditions: join.conditions.clone(),
838 });
839 inner_card.max(left_card)
840 }
841 JoinType::Right => {
842 let inner_card = self.estimate_join(&JoinOp {
844 left: join.left.clone(),
845 right: join.right.clone(),
846 join_type: JoinType::Inner,
847 conditions: join.conditions.clone(),
848 });
849 inner_card.max(right_card)
850 }
851 JoinType::Full => {
852 let inner_card = self.estimate_join(&JoinOp {
854 left: join.left.clone(),
855 right: join.right.clone(),
856 join_type: JoinType::Inner,
857 conditions: join.conditions.clone(),
858 });
859 inner_card.max(left_card.max(right_card))
860 }
861 JoinType::Semi => {
862 (left_card * self.default_selectivity).max(1.0)
864 }
865 JoinType::Anti => {
866 (left_card * (1.0 - self.default_selectivity)).max(1.0)
868 }
869 }
870 }
871
872 fn estimate_left_join(&self, lj: &LeftJoinOp) -> f64 {
881 let left_card = self.estimate(&lj.left);
882 let right_card = self.estimate(&lj.right);
883
884 let condition_selectivity = if let Some(cond) = &lj.condition {
887 let n = count_and_conjuncts(cond);
888 self.default_selectivity.powi(n as i32)
889 } else {
890 self.default_selectivity
891 };
892
893 let inner_estimate = left_card * right_card * condition_selectivity;
895 inner_estimate.max(left_card).max(1.0)
896 }
897
898 fn estimate_aggregate(&self, agg: &AggregateOp) -> f64 {
900 let input_cardinality = self.estimate(&agg.input);
901
902 if agg.group_by.is_empty() {
903 1.0
905 } else {
906 let group_reduction = 10.0_f64.powi(agg.group_by.len() as i32);
909 (input_cardinality / group_reduction).max(1.0)
910 }
911 }
912
913 fn estimate_sort(&self, sort: &SortOp) -> f64 {
915 self.estimate(&sort.input)
916 }
917
918 fn estimate_distinct(&self, distinct: &DistinctOp) -> f64 {
920 let input_cardinality = self.estimate(&distinct.input);
921 (input_cardinality * self.selectivity_config.distinct_fraction).max(1.0)
922 }
923
924 fn estimate_limit(&self, limit: &LimitOp) -> f64 {
926 let input_cardinality = self.estimate(&limit.input);
927 limit.count.estimate().min(input_cardinality)
928 }
929
930 fn estimate_skip(&self, skip: &SkipOp) -> f64 {
932 let input_cardinality = self.estimate(&skip.input);
933 (input_cardinality - skip.count.estimate()).max(0.0)
934 }
935
936 fn estimate_vector_scan(&self, scan: &VectorScanOp) -> f64 {
941 let base_k = scan.k as f64;
942
943 let selectivity = if scan.min_similarity.is_some() || scan.max_distance.is_some() {
945 0.7
947 } else {
948 1.0
949 };
950
951 (base_k * selectivity).max(1.0)
952 }
953
954 fn estimate_vector_join(&self, join: &VectorJoinOp) -> f64 {
958 let input_cardinality = self.estimate(&join.input);
959 let k = join.k as f64;
960
961 let selectivity = if join.min_similarity.is_some() || join.max_distance.is_some() {
963 0.7
964 } else {
965 1.0
966 };
967
968 (input_cardinality * k * selectivity).max(1.0)
969 }
970
971 fn estimate_multi_way_join(&self, mwj: &MultiWayJoinOp) -> f64 {
976 if mwj.inputs.is_empty() {
977 return 0.0;
978 }
979 let cardinalities: Vec<f64> = mwj
980 .inputs
981 .iter()
982 .map(|input| self.estimate(input))
983 .collect();
984 let min_card = cardinalities.iter().copied().fold(f64::INFINITY, f64::min);
985 let n = cardinalities.len() as f64;
986 (min_card.powf(n / 2.0)).max(1.0)
988 }
989
990 fn estimate_selectivity(&self, expr: &LogicalExpression) -> f64 {
992 match expr {
993 LogicalExpression::Binary { left, op, right } => {
994 self.estimate_binary_selectivity(left, *op, right)
995 }
996 LogicalExpression::Unary { op, operand } => {
997 self.estimate_unary_selectivity(*op, operand)
998 }
999 LogicalExpression::Literal(value) => {
1000 if let grafeo_common::types::Value::Bool(b) = value {
1002 if *b { 1.0 } else { 0.0 }
1003 } else {
1004 self.default_selectivity
1005 }
1006 }
1007 _ => self.default_selectivity,
1008 }
1009 }
1010
1011 fn estimate_binary_selectivity(
1013 &self,
1014 left: &LogicalExpression,
1015 op: BinaryOp,
1016 right: &LogicalExpression,
1017 ) -> f64 {
1018 match op {
1019 BinaryOp::Eq => {
1021 if let Some(selectivity) = self.try_equality_selectivity(left, right) {
1022 return selectivity;
1023 }
1024 self.selectivity_config.equality
1025 }
1026 BinaryOp::Ne => self.selectivity_config.inequality,
1028 BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge => {
1030 if let Some(selectivity) = self.try_range_selectivity(left, op, right) {
1031 return selectivity;
1032 }
1033 self.selectivity_config.range
1034 }
1035 BinaryOp::And => {
1037 let left_sel = self.estimate_selectivity(left);
1038 let right_sel = self.estimate_selectivity(right);
1039 left_sel * right_sel
1041 }
1042 BinaryOp::Or => {
1043 let left_sel = self.estimate_selectivity(left);
1044 let right_sel = self.estimate_selectivity(right);
1045 (left_sel + right_sel - left_sel * right_sel).min(1.0)
1048 }
1049 BinaryOp::StartsWith | BinaryOp::EndsWith | BinaryOp::Contains | BinaryOp::Like => {
1051 self.selectivity_config.string_ops
1052 }
1053 BinaryOp::In => self.selectivity_config.membership,
1055 _ => self.default_selectivity,
1057 }
1058 }
1059
1060 fn try_equality_selectivity(
1062 &self,
1063 left: &LogicalExpression,
1064 right: &LogicalExpression,
1065 ) -> Option<f64> {
1066 let (label, column, value) = self.extract_column_and_value(left, right)?;
1068
1069 let stats = self.get_column_stats(&label, &column)?;
1071
1072 if let Some(ref histogram) = stats.histogram {
1074 return Some(histogram.equality_selectivity(value));
1075 }
1076
1077 if stats.distinct_count > 0 {
1079 return Some(1.0 / stats.distinct_count as f64);
1080 }
1081
1082 None
1083 }
1084
1085 fn try_range_selectivity(
1087 &self,
1088 left: &LogicalExpression,
1089 op: BinaryOp,
1090 right: &LogicalExpression,
1091 ) -> Option<f64> {
1092 let (label, column, value) = self.extract_column_and_value(left, right)?;
1094
1095 let stats = self.get_column_stats(&label, &column)?;
1097
1098 let (lower, upper) = match op {
1100 BinaryOp::Lt => (None, Some(value)),
1101 BinaryOp::Le => (None, Some(value + f64::EPSILON)),
1102 BinaryOp::Gt => (Some(value + f64::EPSILON), None),
1103 BinaryOp::Ge => (Some(value), None),
1104 _ => return None,
1105 };
1106
1107 if let Some(ref histogram) = stats.histogram {
1109 return Some(histogram.range_selectivity(lower, upper));
1110 }
1111
1112 if let (Some(min), Some(max)) = (stats.min_value, stats.max_value) {
1114 let range = max - min;
1115 if range <= 0.0 {
1116 return Some(1.0);
1117 }
1118
1119 let effective_lower = lower.unwrap_or(min).max(min);
1120 let effective_upper = upper.unwrap_or(max).min(max);
1121 let overlap = (effective_upper - effective_lower).max(0.0);
1122 return Some((overlap / range).clamp(0.0, 1.0));
1123 }
1124
1125 None
1126 }
1127
1128 fn extract_column_and_value(
1133 &self,
1134 left: &LogicalExpression,
1135 right: &LogicalExpression,
1136 ) -> Option<(String, String, f64)> {
1137 if let Some(result) = self.try_extract_property_literal(left, right) {
1139 return Some(result);
1140 }
1141
1142 self.try_extract_property_literal(right, left)
1144 }
1145
1146 fn try_extract_property_literal(
1148 &self,
1149 property_expr: &LogicalExpression,
1150 literal_expr: &LogicalExpression,
1151 ) -> Option<(String, String, f64)> {
1152 let (variable, property) = match property_expr {
1154 LogicalExpression::Property { variable, property } => {
1155 (variable.clone(), property.clone())
1156 }
1157 _ => return None,
1158 };
1159
1160 let value = match literal_expr {
1162 LogicalExpression::Literal(grafeo_common::types::Value::Int64(n)) => *n as f64,
1163 LogicalExpression::Literal(grafeo_common::types::Value::Float64(f)) => *f,
1164 _ => return None,
1165 };
1166
1167 for label in self.table_stats.keys() {
1171 if let Some(stats) = self.table_stats.get(label)
1172 && stats.columns.contains_key(&property)
1173 {
1174 return Some((label.clone(), property, value));
1175 }
1176 }
1177
1178 Some((variable, property, value))
1180 }
1181
1182 fn estimate_unary_selectivity(&self, op: UnaryOp, _operand: &LogicalExpression) -> f64 {
1184 match op {
1185 UnaryOp::Not => 1.0 - self.default_selectivity,
1186 UnaryOp::IsNull => self.selectivity_config.is_null,
1187 UnaryOp::IsNotNull => self.selectivity_config.is_not_null,
1188 UnaryOp::Neg => 1.0, }
1190 }
1191
1192 fn get_column_stats(&self, label: &str, column: &str) -> Option<&ColumnStats> {
1194 self.table_stats.get(label)?.columns.get(column)
1195 }
1196}
1197
1198impl Default for CardinalityEstimator {
1199 fn default() -> Self {
1200 Self::new()
1201 }
1202}
1203
1204#[cfg(test)]
1205mod tests {
1206 use super::*;
1207 use crate::query::plan::{
1208 DistinctOp, ExpandDirection, ExpandOp, FilterOp, JoinCondition, NodeScanOp, PathMode,
1209 ProjectOp, Projection, ReturnItem, ReturnOp, SkipOp, SortKey, SortOp, SortOrder,
1210 };
1211 use grafeo_common::types::Value;
1212
1213 #[test]
1214 fn test_node_scan_with_stats() {
1215 let mut estimator = CardinalityEstimator::new();
1216 estimator.add_table_stats("Person", TableStats::new(5000));
1217
1218 let scan = LogicalOperator::NodeScan(NodeScanOp {
1219 variable: "n".to_string(),
1220 label: Some("Person".to_string()),
1221 input: None,
1222 });
1223
1224 let cardinality = estimator.estimate(&scan);
1225 assert!((cardinality - 5000.0).abs() < 0.001);
1226 }
1227
1228 #[test]
1229 fn test_filter_reduces_cardinality() {
1230 let mut estimator = CardinalityEstimator::new();
1231 estimator.add_table_stats("Person", TableStats::new(1000));
1232
1233 let filter = LogicalOperator::Filter(FilterOp {
1234 predicate: LogicalExpression::Binary {
1235 left: Box::new(LogicalExpression::Property {
1236 variable: "n".to_string(),
1237 property: "age".to_string(),
1238 }),
1239 op: BinaryOp::Eq,
1240 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1241 },
1242 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1243 variable: "n".to_string(),
1244 label: Some("Person".to_string()),
1245 input: None,
1246 })),
1247 pushdown_hint: None,
1248 });
1249
1250 let cardinality = estimator.estimate(&filter);
1251 assert!(cardinality < 1000.0);
1253 assert!(cardinality >= 1.0);
1254 }
1255
1256 #[test]
1257 fn test_join_cardinality() {
1258 let mut estimator = CardinalityEstimator::new();
1259 estimator.add_table_stats("Person", TableStats::new(1000));
1260 estimator.add_table_stats("Company", TableStats::new(100));
1261
1262 let join = LogicalOperator::Join(JoinOp {
1263 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1264 variable: "p".to_string(),
1265 label: Some("Person".to_string()),
1266 input: None,
1267 })),
1268 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1269 variable: "c".to_string(),
1270 label: Some("Company".to_string()),
1271 input: None,
1272 })),
1273 join_type: JoinType::Inner,
1274 conditions: vec![JoinCondition {
1275 left: LogicalExpression::Property {
1276 variable: "p".to_string(),
1277 property: "company_id".to_string(),
1278 },
1279 right: LogicalExpression::Property {
1280 variable: "c".to_string(),
1281 property: "id".to_string(),
1282 },
1283 }],
1284 });
1285
1286 let cardinality = estimator.estimate(&join);
1287 assert!(cardinality < 1000.0 * 100.0);
1289 }
1290
1291 #[test]
1292 fn test_limit_caps_cardinality() {
1293 let mut estimator = CardinalityEstimator::new();
1294 estimator.add_table_stats("Person", TableStats::new(1000));
1295
1296 let limit = LogicalOperator::Limit(LimitOp {
1297 count: 10.into(),
1298 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1299 variable: "n".to_string(),
1300 label: Some("Person".to_string()),
1301 input: None,
1302 })),
1303 });
1304
1305 let cardinality = estimator.estimate(&limit);
1306 assert!((cardinality - 10.0).abs() < 0.001);
1307 }
1308
1309 #[test]
1310 fn test_aggregate_reduces_cardinality() {
1311 let mut estimator = CardinalityEstimator::new();
1312 estimator.add_table_stats("Person", TableStats::new(1000));
1313
1314 let global_agg = LogicalOperator::Aggregate(AggregateOp {
1316 group_by: vec![],
1317 aggregates: vec![],
1318 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1319 variable: "n".to_string(),
1320 label: Some("Person".to_string()),
1321 input: None,
1322 })),
1323 having: None,
1324 });
1325
1326 let cardinality = estimator.estimate(&global_agg);
1327 assert!((cardinality - 1.0).abs() < 0.001);
1328
1329 let group_agg = LogicalOperator::Aggregate(AggregateOp {
1331 group_by: vec![LogicalExpression::Property {
1332 variable: "n".to_string(),
1333 property: "city".to_string(),
1334 }],
1335 aggregates: vec![],
1336 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1337 variable: "n".to_string(),
1338 label: Some("Person".to_string()),
1339 input: None,
1340 })),
1341 having: None,
1342 });
1343
1344 let cardinality = estimator.estimate(&group_agg);
1345 assert!(cardinality < 1000.0);
1347 }
1348
1349 #[test]
1350 fn test_node_scan_without_stats() {
1351 let estimator = CardinalityEstimator::new();
1352
1353 let scan = LogicalOperator::NodeScan(NodeScanOp {
1354 variable: "n".to_string(),
1355 label: Some("Unknown".to_string()),
1356 input: None,
1357 });
1358
1359 let cardinality = estimator.estimate(&scan);
1360 assert!((cardinality - 1000.0).abs() < 0.001);
1362 }
1363
1364 #[test]
1365 fn test_node_scan_no_label() {
1366 let estimator = CardinalityEstimator::new();
1367
1368 let scan = LogicalOperator::NodeScan(NodeScanOp {
1369 variable: "n".to_string(),
1370 label: None,
1371 input: None,
1372 });
1373
1374 let cardinality = estimator.estimate(&scan);
1375 assert!((cardinality - 1000.0).abs() < 0.001);
1377 }
1378
1379 #[test]
1380 fn test_filter_inequality_selectivity() {
1381 let mut estimator = CardinalityEstimator::new();
1382 estimator.add_table_stats("Person", TableStats::new(1000));
1383
1384 let filter = LogicalOperator::Filter(FilterOp {
1385 predicate: LogicalExpression::Binary {
1386 left: Box::new(LogicalExpression::Property {
1387 variable: "n".to_string(),
1388 property: "age".to_string(),
1389 }),
1390 op: BinaryOp::Ne,
1391 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1392 },
1393 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1394 variable: "n".to_string(),
1395 label: Some("Person".to_string()),
1396 input: None,
1397 })),
1398 pushdown_hint: None,
1399 });
1400
1401 let cardinality = estimator.estimate(&filter);
1402 assert!(cardinality > 900.0);
1404 }
1405
1406 #[test]
1407 fn test_filter_range_selectivity() {
1408 let mut estimator = CardinalityEstimator::new();
1409 estimator.add_table_stats("Person", TableStats::new(1000));
1410
1411 let filter = LogicalOperator::Filter(FilterOp {
1412 predicate: LogicalExpression::Binary {
1413 left: Box::new(LogicalExpression::Property {
1414 variable: "n".to_string(),
1415 property: "age".to_string(),
1416 }),
1417 op: BinaryOp::Gt,
1418 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1419 },
1420 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1421 variable: "n".to_string(),
1422 label: Some("Person".to_string()),
1423 input: None,
1424 })),
1425 pushdown_hint: None,
1426 });
1427
1428 let cardinality = estimator.estimate(&filter);
1429 assert!(cardinality < 500.0);
1431 assert!(cardinality > 100.0);
1432 }
1433
1434 #[test]
1435 fn test_filter_and_selectivity() {
1436 let mut estimator = CardinalityEstimator::new();
1437 estimator.add_table_stats("Person", TableStats::new(1000));
1438
1439 let filter = LogicalOperator::Filter(FilterOp {
1442 predicate: LogicalExpression::Binary {
1443 left: Box::new(LogicalExpression::Binary {
1444 left: Box::new(LogicalExpression::Property {
1445 variable: "n".to_string(),
1446 property: "city".to_string(),
1447 }),
1448 op: BinaryOp::Eq,
1449 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1450 }),
1451 op: BinaryOp::And,
1452 right: Box::new(LogicalExpression::Binary {
1453 left: Box::new(LogicalExpression::Property {
1454 variable: "n".to_string(),
1455 property: "age".to_string(),
1456 }),
1457 op: BinaryOp::Eq,
1458 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1459 }),
1460 },
1461 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1462 variable: "n".to_string(),
1463 label: Some("Person".to_string()),
1464 input: None,
1465 })),
1466 pushdown_hint: None,
1467 });
1468
1469 let cardinality = estimator.estimate(&filter);
1470 assert!(cardinality < 100.0);
1473 assert!(cardinality >= 1.0);
1474 }
1475
1476 #[test]
1477 fn test_filter_or_selectivity() {
1478 let mut estimator = CardinalityEstimator::new();
1479 estimator.add_table_stats("Person", TableStats::new(1000));
1480
1481 let filter = LogicalOperator::Filter(FilterOp {
1485 predicate: LogicalExpression::Binary {
1486 left: Box::new(LogicalExpression::Binary {
1487 left: Box::new(LogicalExpression::Property {
1488 variable: "n".to_string(),
1489 property: "city".to_string(),
1490 }),
1491 op: BinaryOp::Eq,
1492 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1493 }),
1494 op: BinaryOp::Or,
1495 right: Box::new(LogicalExpression::Binary {
1496 left: Box::new(LogicalExpression::Property {
1497 variable: "n".to_string(),
1498 property: "city".to_string(),
1499 }),
1500 op: BinaryOp::Eq,
1501 right: Box::new(LogicalExpression::Literal(Value::String("LA".into()))),
1502 }),
1503 },
1504 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1505 variable: "n".to_string(),
1506 label: Some("Person".to_string()),
1507 input: None,
1508 })),
1509 pushdown_hint: None,
1510 });
1511
1512 let cardinality = estimator.estimate(&filter);
1513 assert!(cardinality < 100.0);
1515 assert!(cardinality >= 1.0);
1516 }
1517
1518 #[test]
1519 fn test_filter_literal_true() {
1520 let mut estimator = CardinalityEstimator::new();
1521 estimator.add_table_stats("Person", TableStats::new(1000));
1522
1523 let filter = LogicalOperator::Filter(FilterOp {
1524 predicate: LogicalExpression::Literal(Value::Bool(true)),
1525 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1526 variable: "n".to_string(),
1527 label: Some("Person".to_string()),
1528 input: None,
1529 })),
1530 pushdown_hint: None,
1531 });
1532
1533 let cardinality = estimator.estimate(&filter);
1534 assert!((cardinality - 1000.0).abs() < 0.001);
1536 }
1537
1538 #[test]
1539 fn test_filter_literal_false() {
1540 let mut estimator = CardinalityEstimator::new();
1541 estimator.add_table_stats("Person", TableStats::new(1000));
1542
1543 let filter = LogicalOperator::Filter(FilterOp {
1544 predicate: LogicalExpression::Literal(Value::Bool(false)),
1545 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1546 variable: "n".to_string(),
1547 label: Some("Person".to_string()),
1548 input: None,
1549 })),
1550 pushdown_hint: None,
1551 });
1552
1553 let cardinality = estimator.estimate(&filter);
1554 assert!((cardinality - 1.0).abs() < 0.001);
1556 }
1557
1558 #[test]
1559 fn test_unary_not_selectivity() {
1560 let mut estimator = CardinalityEstimator::new();
1561 estimator.add_table_stats("Person", TableStats::new(1000));
1562
1563 let filter = LogicalOperator::Filter(FilterOp {
1564 predicate: LogicalExpression::Unary {
1565 op: UnaryOp::Not,
1566 operand: Box::new(LogicalExpression::Literal(Value::Bool(true))),
1567 },
1568 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1569 variable: "n".to_string(),
1570 label: Some("Person".to_string()),
1571 input: None,
1572 })),
1573 pushdown_hint: None,
1574 });
1575
1576 let cardinality = estimator.estimate(&filter);
1577 assert!(cardinality < 1000.0);
1579 }
1580
1581 #[test]
1582 fn test_unary_is_null_selectivity() {
1583 let mut estimator = CardinalityEstimator::new();
1584 estimator.add_table_stats("Person", TableStats::new(1000));
1585
1586 let filter = LogicalOperator::Filter(FilterOp {
1587 predicate: LogicalExpression::Unary {
1588 op: UnaryOp::IsNull,
1589 operand: Box::new(LogicalExpression::Variable("x".to_string())),
1590 },
1591 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1592 variable: "n".to_string(),
1593 label: Some("Person".to_string()),
1594 input: None,
1595 })),
1596 pushdown_hint: None,
1597 });
1598
1599 let cardinality = estimator.estimate(&filter);
1600 assert!(cardinality < 100.0);
1602 }
1603
1604 #[test]
1605 fn test_expand_cardinality() {
1606 let mut estimator = CardinalityEstimator::new();
1607 estimator.add_table_stats("Person", TableStats::new(100));
1608
1609 let expand = LogicalOperator::Expand(ExpandOp {
1610 from_variable: "a".to_string(),
1611 to_variable: "b".to_string(),
1612 edge_variable: None,
1613 direction: ExpandDirection::Outgoing,
1614 edge_types: vec![],
1615 min_hops: 1,
1616 max_hops: Some(1),
1617 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1618 variable: "a".to_string(),
1619 label: Some("Person".to_string()),
1620 input: None,
1621 })),
1622 path_alias: None,
1623 path_mode: PathMode::Walk,
1624 });
1625
1626 let cardinality = estimator.estimate(&expand);
1627 assert!(cardinality > 100.0);
1629 }
1630
1631 #[test]
1632 fn test_expand_with_edge_type_filter() {
1633 let mut estimator = CardinalityEstimator::new();
1634 estimator.add_table_stats("Person", TableStats::new(100));
1635
1636 let expand = LogicalOperator::Expand(ExpandOp {
1637 from_variable: "a".to_string(),
1638 to_variable: "b".to_string(),
1639 edge_variable: None,
1640 direction: ExpandDirection::Outgoing,
1641 edge_types: vec!["KNOWS".to_string()],
1642 min_hops: 1,
1643 max_hops: Some(1),
1644 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1645 variable: "a".to_string(),
1646 label: Some("Person".to_string()),
1647 input: None,
1648 })),
1649 path_alias: None,
1650 path_mode: PathMode::Walk,
1651 });
1652
1653 let cardinality = estimator.estimate(&expand);
1654 assert!(cardinality > 100.0);
1656 }
1657
1658 #[test]
1659 fn test_expand_variable_length() {
1660 let mut estimator = CardinalityEstimator::new();
1661 estimator.add_table_stats("Person", TableStats::new(100));
1662
1663 let expand = LogicalOperator::Expand(ExpandOp {
1664 from_variable: "a".to_string(),
1665 to_variable: "b".to_string(),
1666 edge_variable: None,
1667 direction: ExpandDirection::Outgoing,
1668 edge_types: vec![],
1669 min_hops: 1,
1670 max_hops: Some(3),
1671 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1672 variable: "a".to_string(),
1673 label: Some("Person".to_string()),
1674 input: None,
1675 })),
1676 path_alias: None,
1677 path_mode: PathMode::Walk,
1678 });
1679
1680 let cardinality = estimator.estimate(&expand);
1681 assert!(cardinality > 500.0);
1683 }
1684
1685 #[test]
1686 fn test_join_cross_product() {
1687 let mut estimator = CardinalityEstimator::new();
1688 estimator.add_table_stats("Person", TableStats::new(100));
1689 estimator.add_table_stats("Company", TableStats::new(50));
1690
1691 let join = LogicalOperator::Join(JoinOp {
1692 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1693 variable: "p".to_string(),
1694 label: Some("Person".to_string()),
1695 input: None,
1696 })),
1697 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1698 variable: "c".to_string(),
1699 label: Some("Company".to_string()),
1700 input: None,
1701 })),
1702 join_type: JoinType::Cross,
1703 conditions: vec![],
1704 });
1705
1706 let cardinality = estimator.estimate(&join);
1707 assert!((cardinality - 5000.0).abs() < 0.001);
1709 }
1710
1711 #[test]
1712 fn test_join_left_outer() {
1713 let mut estimator = CardinalityEstimator::new();
1714 estimator.add_table_stats("Person", TableStats::new(1000));
1715 estimator.add_table_stats("Company", TableStats::new(10));
1716
1717 let join = LogicalOperator::Join(JoinOp {
1718 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1719 variable: "p".to_string(),
1720 label: Some("Person".to_string()),
1721 input: None,
1722 })),
1723 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1724 variable: "c".to_string(),
1725 label: Some("Company".to_string()),
1726 input: None,
1727 })),
1728 join_type: JoinType::Left,
1729 conditions: vec![JoinCondition {
1730 left: LogicalExpression::Variable("p".to_string()),
1731 right: LogicalExpression::Variable("c".to_string()),
1732 }],
1733 });
1734
1735 let cardinality = estimator.estimate(&join);
1736 assert!(cardinality >= 1000.0);
1738 }
1739
1740 #[test]
1741 fn test_join_semi() {
1742 let mut estimator = CardinalityEstimator::new();
1743 estimator.add_table_stats("Person", TableStats::new(1000));
1744 estimator.add_table_stats("Company", TableStats::new(100));
1745
1746 let join = LogicalOperator::Join(JoinOp {
1747 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1748 variable: "p".to_string(),
1749 label: Some("Person".to_string()),
1750 input: None,
1751 })),
1752 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1753 variable: "c".to_string(),
1754 label: Some("Company".to_string()),
1755 input: None,
1756 })),
1757 join_type: JoinType::Semi,
1758 conditions: vec![],
1759 });
1760
1761 let cardinality = estimator.estimate(&join);
1762 assert!(cardinality <= 1000.0);
1764 }
1765
1766 #[test]
1767 fn test_join_anti() {
1768 let mut estimator = CardinalityEstimator::new();
1769 estimator.add_table_stats("Person", TableStats::new(1000));
1770 estimator.add_table_stats("Company", TableStats::new(100));
1771
1772 let join = LogicalOperator::Join(JoinOp {
1773 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1774 variable: "p".to_string(),
1775 label: Some("Person".to_string()),
1776 input: None,
1777 })),
1778 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1779 variable: "c".to_string(),
1780 label: Some("Company".to_string()),
1781 input: None,
1782 })),
1783 join_type: JoinType::Anti,
1784 conditions: vec![],
1785 });
1786
1787 let cardinality = estimator.estimate(&join);
1788 assert!(cardinality <= 1000.0);
1790 assert!(cardinality >= 1.0);
1791 }
1792
1793 #[test]
1794 fn test_project_preserves_cardinality() {
1795 let mut estimator = CardinalityEstimator::new();
1796 estimator.add_table_stats("Person", TableStats::new(1000));
1797
1798 let project = LogicalOperator::Project(ProjectOp {
1799 projections: vec![Projection {
1800 expression: LogicalExpression::Variable("n".to_string()),
1801 alias: None,
1802 }],
1803 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1804 variable: "n".to_string(),
1805 label: Some("Person".to_string()),
1806 input: None,
1807 })),
1808 pass_through_input: false,
1809 });
1810
1811 let cardinality = estimator.estimate(&project);
1812 assert!((cardinality - 1000.0).abs() < 0.001);
1813 }
1814
1815 #[test]
1816 fn test_sort_preserves_cardinality() {
1817 let mut estimator = CardinalityEstimator::new();
1818 estimator.add_table_stats("Person", TableStats::new(1000));
1819
1820 let sort = LogicalOperator::Sort(SortOp {
1821 keys: vec![SortKey {
1822 expression: LogicalExpression::Variable("n".to_string()),
1823 order: SortOrder::Ascending,
1824 nulls: None,
1825 }],
1826 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1827 variable: "n".to_string(),
1828 label: Some("Person".to_string()),
1829 input: None,
1830 })),
1831 });
1832
1833 let cardinality = estimator.estimate(&sort);
1834 assert!((cardinality - 1000.0).abs() < 0.001);
1835 }
1836
1837 #[test]
1838 fn test_distinct_reduces_cardinality() {
1839 let mut estimator = CardinalityEstimator::new();
1840 estimator.add_table_stats("Person", TableStats::new(1000));
1841
1842 let distinct = LogicalOperator::Distinct(DistinctOp {
1843 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1844 variable: "n".to_string(),
1845 label: Some("Person".to_string()),
1846 input: None,
1847 })),
1848 columns: None,
1849 });
1850
1851 let cardinality = estimator.estimate(&distinct);
1852 assert!((cardinality - 500.0).abs() < 0.001);
1854 }
1855
1856 #[test]
1857 fn test_skip_reduces_cardinality() {
1858 let mut estimator = CardinalityEstimator::new();
1859 estimator.add_table_stats("Person", TableStats::new(1000));
1860
1861 let skip = LogicalOperator::Skip(SkipOp {
1862 count: 100.into(),
1863 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1864 variable: "n".to_string(),
1865 label: Some("Person".to_string()),
1866 input: None,
1867 })),
1868 });
1869
1870 let cardinality = estimator.estimate(&skip);
1871 assert!((cardinality - 900.0).abs() < 0.001);
1872 }
1873
1874 #[test]
1875 fn test_return_preserves_cardinality() {
1876 let mut estimator = CardinalityEstimator::new();
1877 estimator.add_table_stats("Person", TableStats::new(1000));
1878
1879 let ret = LogicalOperator::Return(ReturnOp {
1880 items: vec![ReturnItem {
1881 expression: LogicalExpression::Variable("n".to_string()),
1882 alias: None,
1883 }],
1884 distinct: false,
1885 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1886 variable: "n".to_string(),
1887 label: Some("Person".to_string()),
1888 input: None,
1889 })),
1890 });
1891
1892 let cardinality = estimator.estimate(&ret);
1893 assert!((cardinality - 1000.0).abs() < 0.001);
1894 }
1895
1896 #[test]
1897 fn test_empty_cardinality() {
1898 let estimator = CardinalityEstimator::new();
1899 let cardinality = estimator.estimate(&LogicalOperator::Empty);
1900 assert!((cardinality).abs() < 0.001);
1901 }
1902
1903 #[test]
1904 fn test_table_stats_with_column() {
1905 let stats = TableStats::new(1000).with_column(
1906 "age",
1907 ColumnStats::new(50).with_nulls(10).with_range(0.0, 100.0),
1908 );
1909
1910 assert_eq!(stats.row_count, 1000);
1911 let col = stats.columns.get("age").unwrap();
1912 assert_eq!(col.distinct_count, 50);
1913 assert_eq!(col.null_count, 10);
1914 assert!((col.min_value.unwrap() - 0.0).abs() < 0.001);
1915 assert!((col.max_value.unwrap() - 100.0).abs() < 0.001);
1916 }
1917
1918 #[test]
1919 fn test_estimator_default() {
1920 let estimator = CardinalityEstimator::default();
1921 let scan = LogicalOperator::NodeScan(NodeScanOp {
1922 variable: "n".to_string(),
1923 label: None,
1924 input: None,
1925 });
1926 let cardinality = estimator.estimate(&scan);
1927 assert!((cardinality - 1000.0).abs() < 0.001);
1928 }
1929
1930 #[test]
1931 fn test_set_avg_fanout() {
1932 let mut estimator = CardinalityEstimator::new();
1933 estimator.add_table_stats("Person", TableStats::new(100));
1934 estimator.set_avg_fanout(5.0);
1935
1936 let expand = LogicalOperator::Expand(ExpandOp {
1937 from_variable: "a".to_string(),
1938 to_variable: "b".to_string(),
1939 edge_variable: None,
1940 direction: ExpandDirection::Outgoing,
1941 edge_types: vec![],
1942 min_hops: 1,
1943 max_hops: Some(1),
1944 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1945 variable: "a".to_string(),
1946 label: Some("Person".to_string()),
1947 input: None,
1948 })),
1949 path_alias: None,
1950 path_mode: PathMode::Walk,
1951 });
1952
1953 let cardinality = estimator.estimate(&expand);
1954 assert!((cardinality - 500.0).abs() < 0.001);
1956 }
1957
1958 #[test]
1959 fn test_multiple_group_by_keys_reduce_cardinality() {
1960 let mut estimator = CardinalityEstimator::new();
1964 estimator.add_table_stats("Person", TableStats::new(10000));
1965
1966 let single_group = LogicalOperator::Aggregate(AggregateOp {
1967 group_by: vec![LogicalExpression::Property {
1968 variable: "n".to_string(),
1969 property: "city".to_string(),
1970 }],
1971 aggregates: vec![],
1972 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1973 variable: "n".to_string(),
1974 label: Some("Person".to_string()),
1975 input: None,
1976 })),
1977 having: None,
1978 });
1979
1980 let multi_group = LogicalOperator::Aggregate(AggregateOp {
1981 group_by: vec![
1982 LogicalExpression::Property {
1983 variable: "n".to_string(),
1984 property: "city".to_string(),
1985 },
1986 LogicalExpression::Property {
1987 variable: "n".to_string(),
1988 property: "country".to_string(),
1989 },
1990 ],
1991 aggregates: vec![],
1992 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1993 variable: "n".to_string(),
1994 label: Some("Person".to_string()),
1995 input: None,
1996 })),
1997 having: None,
1998 });
1999
2000 let single_card = estimator.estimate(&single_group);
2001 let multi_card = estimator.estimate(&multi_group);
2002
2003 assert!(single_card < 10000.0);
2005 assert!(multi_card < 10000.0);
2006 assert!(single_card >= 1.0);
2008 assert!(multi_card >= 1.0);
2009 }
2010
2011 #[test]
2014 fn test_histogram_build_uniform() {
2015 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2017 let histogram = EquiDepthHistogram::build(&values, 10);
2018
2019 assert_eq!(histogram.num_buckets(), 10);
2020 assert_eq!(histogram.total_rows(), 100);
2021
2022 for bucket in histogram.buckets() {
2024 assert!(bucket.frequency >= 9 && bucket.frequency <= 11);
2025 }
2026 }
2027
2028 #[test]
2029 fn test_histogram_build_skewed() {
2030 let mut values: Vec<f64> = (0..80).map(|i| i as f64).collect();
2032 values.extend((0..20).map(|i| 1000.0 + i as f64));
2033 let histogram = EquiDepthHistogram::build(&values, 5);
2034
2035 assert_eq!(histogram.num_buckets(), 5);
2036 assert_eq!(histogram.total_rows(), 100);
2037
2038 for bucket in histogram.buckets() {
2040 assert!(bucket.frequency >= 18 && bucket.frequency <= 22);
2041 }
2042 }
2043
2044 #[test]
2045 fn test_histogram_range_selectivity_full() {
2046 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2047 let histogram = EquiDepthHistogram::build(&values, 10);
2048
2049 let selectivity = histogram.range_selectivity(None, None);
2051 assert!((selectivity - 1.0).abs() < 0.01);
2052 }
2053
2054 #[test]
2055 fn test_histogram_range_selectivity_half() {
2056 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2057 let histogram = EquiDepthHistogram::build(&values, 10);
2058
2059 let selectivity = histogram.range_selectivity(Some(50.0), None);
2061 assert!(selectivity > 0.4 && selectivity < 0.6);
2062 }
2063
2064 #[test]
2065 fn test_histogram_range_selectivity_quarter() {
2066 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2067 let histogram = EquiDepthHistogram::build(&values, 10);
2068
2069 let selectivity = histogram.range_selectivity(None, Some(25.0));
2071 assert!(selectivity > 0.2 && selectivity < 0.3);
2072 }
2073
2074 #[test]
2075 fn test_histogram_equality_selectivity() {
2076 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2077 let histogram = EquiDepthHistogram::build(&values, 10);
2078
2079 let selectivity = histogram.equality_selectivity(50.0);
2081 assert!(selectivity > 0.005 && selectivity < 0.02);
2082 }
2083
2084 #[test]
2085 fn test_histogram_empty() {
2086 let histogram = EquiDepthHistogram::build(&[], 10);
2087
2088 assert_eq!(histogram.num_buckets(), 0);
2089 assert_eq!(histogram.total_rows(), 0);
2090
2091 let selectivity = histogram.range_selectivity(Some(0.0), Some(100.0));
2093 assert!((selectivity - 0.33).abs() < 0.01);
2094 }
2095
2096 #[test]
2097 fn test_histogram_bucket_overlap() {
2098 let bucket = HistogramBucket::new(10.0, 20.0, 100, 10);
2099
2100 assert!((bucket.overlap_fraction(Some(10.0), Some(20.0)) - 1.0).abs() < 0.01);
2102
2103 assert!((bucket.overlap_fraction(Some(10.0), Some(15.0)) - 0.5).abs() < 0.01);
2105
2106 assert!((bucket.overlap_fraction(Some(15.0), Some(20.0)) - 0.5).abs() < 0.01);
2108
2109 assert!((bucket.overlap_fraction(Some(0.0), Some(5.0))).abs() < 0.01);
2111
2112 assert!((bucket.overlap_fraction(Some(25.0), Some(30.0))).abs() < 0.01);
2114 }
2115
2116 #[test]
2117 fn test_column_stats_from_values() {
2118 let values = vec![10.0, 20.0, 30.0, 40.0, 50.0, 20.0, 30.0, 40.0];
2119 let stats = ColumnStats::from_values(values, 4);
2120
2121 assert_eq!(stats.distinct_count, 5); assert!(stats.min_value.is_some());
2123 assert!((stats.min_value.unwrap() - 10.0).abs() < 0.01);
2124 assert!(stats.max_value.is_some());
2125 assert!((stats.max_value.unwrap() - 50.0).abs() < 0.01);
2126 assert!(stats.histogram.is_some());
2127 }
2128
2129 #[test]
2130 fn test_column_stats_with_histogram_builder() {
2131 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2132 let histogram = EquiDepthHistogram::build(&values, 10);
2133
2134 let stats = ColumnStats::new(100)
2135 .with_range(0.0, 99.0)
2136 .with_histogram(histogram);
2137
2138 assert!(stats.histogram.is_some());
2139 assert_eq!(stats.histogram.as_ref().unwrap().num_buckets(), 10);
2140 }
2141
2142 #[test]
2143 fn test_filter_with_histogram_stats() {
2144 let mut estimator = CardinalityEstimator::new();
2145
2146 let age_values: Vec<f64> = (18..80).map(|i| i as f64).collect();
2148 let histogram = EquiDepthHistogram::build(&age_values, 10);
2149 let age_stats = ColumnStats::new(62)
2150 .with_range(18.0, 79.0)
2151 .with_histogram(histogram);
2152
2153 estimator.add_table_stats(
2154 "Person",
2155 TableStats::new(1000).with_column("age", age_stats),
2156 );
2157
2158 let filter = LogicalOperator::Filter(FilterOp {
2161 predicate: LogicalExpression::Binary {
2162 left: Box::new(LogicalExpression::Property {
2163 variable: "n".to_string(),
2164 property: "age".to_string(),
2165 }),
2166 op: BinaryOp::Gt,
2167 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2168 },
2169 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2170 variable: "n".to_string(),
2171 label: Some("Person".to_string()),
2172 input: None,
2173 })),
2174 pushdown_hint: None,
2175 });
2176
2177 let cardinality = estimator.estimate(&filter);
2178
2179 assert!(cardinality > 300.0 && cardinality < 600.0);
2182 }
2183
2184 #[test]
2185 fn test_filter_equality_with_histogram() {
2186 let mut estimator = CardinalityEstimator::new();
2187
2188 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2190 let histogram = EquiDepthHistogram::build(&values, 10);
2191 let stats = ColumnStats::new(100)
2192 .with_range(0.0, 99.0)
2193 .with_histogram(histogram);
2194
2195 estimator.add_table_stats("Data", TableStats::new(1000).with_column("value", stats));
2196
2197 let filter = LogicalOperator::Filter(FilterOp {
2199 predicate: LogicalExpression::Binary {
2200 left: Box::new(LogicalExpression::Property {
2201 variable: "d".to_string(),
2202 property: "value".to_string(),
2203 }),
2204 op: BinaryOp::Eq,
2205 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2206 },
2207 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2208 variable: "d".to_string(),
2209 label: Some("Data".to_string()),
2210 input: None,
2211 })),
2212 pushdown_hint: None,
2213 });
2214
2215 let cardinality = estimator.estimate(&filter);
2216
2217 assert!((1.0..50.0).contains(&cardinality));
2220 }
2221
2222 #[test]
2223 fn test_histogram_min_max() {
2224 let values: Vec<f64> = vec![5.0, 10.0, 15.0, 20.0, 25.0];
2225 let histogram = EquiDepthHistogram::build(&values, 2);
2226
2227 assert_eq!(histogram.min_value(), Some(5.0));
2228 assert!(histogram.max_value().is_some());
2230 }
2231
2232 #[test]
2235 fn test_selectivity_config_defaults() {
2236 let config = SelectivityConfig::new();
2237 assert!((config.default - 0.1).abs() < f64::EPSILON);
2238 assert!((config.equality - 0.01).abs() < f64::EPSILON);
2239 assert!((config.inequality - 0.99).abs() < f64::EPSILON);
2240 assert!((config.range - 0.33).abs() < f64::EPSILON);
2241 assert!((config.string_ops - 0.1).abs() < f64::EPSILON);
2242 assert!((config.membership - 0.1).abs() < f64::EPSILON);
2243 assert!((config.is_null - 0.05).abs() < f64::EPSILON);
2244 assert!((config.is_not_null - 0.95).abs() < f64::EPSILON);
2245 assert!((config.distinct_fraction - 0.5).abs() < f64::EPSILON);
2246 }
2247
2248 #[test]
2249 fn test_custom_selectivity_config() {
2250 let config = SelectivityConfig {
2251 equality: 0.05,
2252 range: 0.25,
2253 ..SelectivityConfig::new()
2254 };
2255 let estimator = CardinalityEstimator::with_selectivity_config(config);
2256 assert!((estimator.selectivity_config().equality - 0.05).abs() < f64::EPSILON);
2257 assert!((estimator.selectivity_config().range - 0.25).abs() < f64::EPSILON);
2258 }
2259
2260 #[test]
2261 fn test_custom_selectivity_affects_estimation() {
2262 let mut default_est = CardinalityEstimator::new();
2264 default_est.add_table_stats("Person", TableStats::new(1000));
2265
2266 let filter = LogicalOperator::Filter(FilterOp {
2267 predicate: LogicalExpression::Binary {
2268 left: Box::new(LogicalExpression::Property {
2269 variable: "n".to_string(),
2270 property: "name".to_string(),
2271 }),
2272 op: BinaryOp::Eq,
2273 right: Box::new(LogicalExpression::Literal(Value::String("Alix".into()))),
2274 },
2275 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2276 variable: "n".to_string(),
2277 label: Some("Person".to_string()),
2278 input: None,
2279 })),
2280 pushdown_hint: None,
2281 });
2282
2283 let default_card = default_est.estimate(&filter);
2284
2285 let config = SelectivityConfig {
2287 equality: 0.2,
2288 ..SelectivityConfig::new()
2289 };
2290 let mut custom_est = CardinalityEstimator::with_selectivity_config(config);
2291 custom_est.add_table_stats("Person", TableStats::new(1000));
2292
2293 let custom_card = custom_est.estimate(&filter);
2294
2295 assert!(custom_card > default_card);
2296 assert!((custom_card - 200.0).abs() < 1.0);
2297 }
2298
2299 #[test]
2300 fn test_custom_range_selectivity() {
2301 let config = SelectivityConfig {
2302 range: 0.5,
2303 ..SelectivityConfig::new()
2304 };
2305 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2306 estimator.add_table_stats("Person", TableStats::new(1000));
2307
2308 let filter = LogicalOperator::Filter(FilterOp {
2309 predicate: LogicalExpression::Binary {
2310 left: Box::new(LogicalExpression::Property {
2311 variable: "n".to_string(),
2312 property: "age".to_string(),
2313 }),
2314 op: BinaryOp::Gt,
2315 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2316 },
2317 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2318 variable: "n".to_string(),
2319 label: Some("Person".to_string()),
2320 input: None,
2321 })),
2322 pushdown_hint: None,
2323 });
2324
2325 let cardinality = estimator.estimate(&filter);
2326 assert!((cardinality - 500.0).abs() < 1.0);
2328 }
2329
2330 #[test]
2331 fn test_custom_distinct_fraction() {
2332 let config = SelectivityConfig {
2333 distinct_fraction: 0.8,
2334 ..SelectivityConfig::new()
2335 };
2336 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2337 estimator.add_table_stats("Person", TableStats::new(1000));
2338
2339 let distinct = LogicalOperator::Distinct(DistinctOp {
2340 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2341 variable: "n".to_string(),
2342 label: Some("Person".to_string()),
2343 input: None,
2344 })),
2345 columns: None,
2346 });
2347
2348 let cardinality = estimator.estimate(&distinct);
2349 assert!((cardinality - 800.0).abs() < 1.0);
2351 }
2352
2353 #[test]
2356 fn test_estimation_log_basic() {
2357 let mut log = EstimationLog::new(10.0);
2358 log.record("NodeScan(Person)", 1000.0, 1200.0);
2359 log.record("Filter(age > 30)", 100.0, 90.0);
2360
2361 assert_eq!(log.entries().len(), 2);
2362 assert!(!log.should_replan()); }
2364
2365 #[test]
2366 fn test_estimation_log_triggers_replan() {
2367 let mut log = EstimationLog::new(10.0);
2368 log.record("NodeScan(Person)", 100.0, 5000.0); assert!(log.should_replan());
2371 }
2372
2373 #[test]
2374 fn test_estimation_log_overestimate_triggers_replan() {
2375 let mut log = EstimationLog::new(5.0);
2376 log.record("Filter", 1000.0, 100.0); assert!(log.should_replan()); }
2380
2381 #[test]
2382 fn test_estimation_entry_error_ratio() {
2383 let entry = EstimationEntry {
2384 operator: "test".into(),
2385 estimated: 100.0,
2386 actual: 200.0,
2387 };
2388 assert!((entry.error_ratio() - 2.0).abs() < f64::EPSILON);
2389
2390 let perfect = EstimationEntry {
2391 operator: "test".into(),
2392 estimated: 100.0,
2393 actual: 100.0,
2394 };
2395 assert!((perfect.error_ratio() - 1.0).abs() < f64::EPSILON);
2396
2397 let zero_est = EstimationEntry {
2398 operator: "test".into(),
2399 estimated: 0.0,
2400 actual: 0.0,
2401 };
2402 assert!((zero_est.error_ratio() - 1.0).abs() < f64::EPSILON);
2403 }
2404
2405 #[test]
2406 fn test_estimation_log_max_error_ratio() {
2407 let mut log = EstimationLog::new(10.0);
2408 log.record("A", 100.0, 300.0); log.record("B", 100.0, 50.0); log.record("C", 100.0, 100.0); assert!((log.max_error_ratio() - 3.0).abs() < f64::EPSILON);
2413 }
2414
2415 #[test]
2416 fn test_estimation_log_clear() {
2417 let mut log = EstimationLog::new(10.0);
2418 log.record("A", 100.0, 100.0);
2419 assert_eq!(log.entries().len(), 1);
2420
2421 log.clear();
2422 assert!(log.entries().is_empty());
2423 assert!(!log.should_replan());
2424 }
2425
2426 #[test]
2427 fn test_create_estimation_log() {
2428 let log = CardinalityEstimator::create_estimation_log();
2429 assert!(log.entries().is_empty());
2430 assert!(!log.should_replan());
2431 }
2432
2433 #[test]
2434 fn test_equality_selectivity_empty_histogram() {
2435 let hist = EquiDepthHistogram::new(vec![]);
2436 assert_eq!(hist.equality_selectivity(5.0), 0.01);
2438 }
2439
2440 #[test]
2441 fn test_equality_selectivity_value_in_bucket() {
2442 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2443 let hist = EquiDepthHistogram::build(&values, 2);
2444 let sel = hist.equality_selectivity(3.0);
2445 assert!(sel > 0.0);
2446 assert!(sel <= 1.0);
2447 }
2448
2449 #[test]
2450 fn test_equality_selectivity_value_outside_all_buckets() {
2451 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2452 let hist = EquiDepthHistogram::build(&values, 2);
2453 let sel = hist.equality_selectivity(9999.0);
2455 assert_eq!(sel, 0.001);
2456 }
2457
2458 #[test]
2459 fn test_histogram_min_max_empty() {
2460 let hist = EquiDepthHistogram::new(vec![]);
2461 assert_eq!(hist.min_value(), None);
2462 assert_eq!(hist.max_value(), None);
2463 }
2464
2465 #[test]
2466 fn test_histogram_min_max_single_bucket() {
2467 let hist = EquiDepthHistogram::new(vec![HistogramBucket::new(1.0, 10.0, 5, 5)]);
2468 assert_eq!(hist.min_value(), Some(1.0));
2469 assert_eq!(hist.max_value(), Some(10.0));
2470 }
2471
2472 #[test]
2473 fn test_histogram_min_max_multi_bucket() {
2474 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0];
2475 let hist = EquiDepthHistogram::build(&values, 3);
2476 let min = hist.min_value().unwrap();
2477 let max = hist.max_value().unwrap();
2478 assert!((min - 1.0).abs() < 1e-9, "min should be 1.0, got {min}");
2479 assert!(max >= 20.0, "max should be >= last value, got {max}");
2480 }
2481
2482 #[test]
2483 fn test_count_and_conjuncts_single_expression() {
2484 use crate::query::plan::LogicalExpression;
2485 let expr = LogicalExpression::Literal(Value::Bool(true));
2486 assert_eq!(count_and_conjuncts(&expr), 1);
2487 }
2488
2489 #[test]
2490 fn test_count_and_conjuncts_flat_and() {
2491 use crate::query::plan::{BinaryOp, LogicalExpression};
2492 let expr = LogicalExpression::Binary {
2493 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2494 op: BinaryOp::And,
2495 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2496 };
2497 assert_eq!(count_and_conjuncts(&expr), 2);
2498 }
2499
2500 #[test]
2501 fn test_count_and_conjuncts_nested_and() {
2502 use crate::query::plan::{BinaryOp, LogicalExpression};
2503 let ab = LogicalExpression::Binary {
2504 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2505 op: BinaryOp::And,
2506 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2507 };
2508 let cd = LogicalExpression::Binary {
2509 left: Box::new(LogicalExpression::Literal(Value::Int64(1))),
2510 op: BinaryOp::And,
2511 right: Box::new(LogicalExpression::Literal(Value::Int64(2))),
2512 };
2513 let expr = LogicalExpression::Binary {
2514 left: Box::new(ab),
2515 op: BinaryOp::And,
2516 right: Box::new(cd),
2517 };
2518 assert_eq!(count_and_conjuncts(&expr), 4);
2519 }
2520
2521 #[test]
2522 fn test_count_distinct_empty() {
2523 assert_eq!(count_distinct(&[]), 0);
2524 }
2525
2526 #[test]
2527 fn test_count_distinct_all_unique() {
2528 assert_eq!(count_distinct(&[1.0, 2.0, 3.0, 4.0]), 4);
2529 }
2530
2531 #[test]
2532 fn test_count_distinct_with_duplicates() {
2533 assert_eq!(count_distinct(&[1.0, 1.0, 2.0, 2.0, 3.0]), 3);
2534 }
2535
2536 #[test]
2537 fn test_count_distinct_all_same() {
2538 assert_eq!(count_distinct(&[5.0, 5.0, 5.0]), 1);
2539 }
2540
2541 #[test]
2542 fn test_count_distinct_single_value() {
2543 assert_eq!(count_distinct(&[42.0]), 1);
2544 }
2545}