1use crate::query::plan::{
18 AggregateOp, BinaryOp, DistinctOp, ExpandOp, FilterOp, JoinOp, JoinType, LeftJoinOp, LimitOp,
19 LogicalExpression, LogicalOperator, MultiWayJoinOp, NodeScanOp, ProjectOp, SkipOp, SortOp,
20 TripleComponent, TripleScanOp, UnaryOp, VectorJoinOp, VectorScanOp,
21};
22use std::collections::HashMap;
23
24#[derive(Debug, Clone)]
30pub struct HistogramBucket {
31 pub lower_bound: f64,
33 pub upper_bound: f64,
35 pub frequency: u64,
37 pub distinct_count: u64,
39}
40
41impl HistogramBucket {
42 #[must_use]
44 pub fn new(lower_bound: f64, upper_bound: f64, frequency: u64, distinct_count: u64) -> Self {
45 Self {
46 lower_bound,
47 upper_bound,
48 frequency,
49 distinct_count,
50 }
51 }
52
53 #[must_use]
55 pub fn width(&self) -> f64 {
56 self.upper_bound - self.lower_bound
57 }
58
59 #[must_use]
61 pub fn contains(&self, value: f64) -> bool {
62 value >= self.lower_bound && value < self.upper_bound
63 }
64
65 #[must_use]
67 pub fn overlap_fraction(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
68 let effective_lower = lower.unwrap_or(self.lower_bound).max(self.lower_bound);
69 let effective_upper = upper.unwrap_or(self.upper_bound).min(self.upper_bound);
70
71 let bucket_width = self.width();
72 if bucket_width <= 0.0 {
73 return if effective_lower <= self.lower_bound && effective_upper >= self.upper_bound {
74 1.0
75 } else {
76 0.0
77 };
78 }
79
80 let overlap = (effective_upper - effective_lower).max(0.0);
81 (overlap / bucket_width).min(1.0)
82 }
83}
84
85#[derive(Debug, Clone)]
105pub struct EquiDepthHistogram {
106 buckets: Vec<HistogramBucket>,
108 total_rows: u64,
110}
111
112impl EquiDepthHistogram {
113 #[must_use]
115 pub fn new(buckets: Vec<HistogramBucket>) -> Self {
116 let total_rows = buckets.iter().map(|b| b.frequency).sum();
117 Self {
118 buckets,
119 total_rows,
120 }
121 }
122
123 #[must_use]
132 pub fn build(values: &[f64], num_buckets: usize) -> Self {
133 if values.is_empty() || num_buckets == 0 {
134 return Self {
135 buckets: Vec::new(),
136 total_rows: 0,
137 };
138 }
139
140 let num_buckets = num_buckets.min(values.len());
141 let rows_per_bucket = (values.len() + num_buckets - 1) / num_buckets;
142 let mut buckets = Vec::with_capacity(num_buckets);
143
144 let mut start_idx = 0;
145 while start_idx < values.len() {
146 let end_idx = (start_idx + rows_per_bucket).min(values.len());
147 let lower_bound = values[start_idx];
148 let upper_bound = if end_idx < values.len() {
149 values[end_idx]
150 } else {
151 values[end_idx - 1] + 1.0
153 };
154
155 let bucket_values = &values[start_idx..end_idx];
157 let distinct_count = count_distinct(bucket_values);
158
159 buckets.push(HistogramBucket::new(
160 lower_bound,
161 upper_bound,
162 (end_idx - start_idx) as u64,
163 distinct_count,
164 ));
165
166 start_idx = end_idx;
167 }
168
169 Self::new(buckets)
170 }
171
172 #[must_use]
174 pub fn num_buckets(&self) -> usize {
175 self.buckets.len()
176 }
177
178 #[must_use]
180 pub fn total_rows(&self) -> u64 {
181 self.total_rows
182 }
183
184 #[must_use]
186 pub fn buckets(&self) -> &[HistogramBucket] {
187 &self.buckets
188 }
189
190 #[must_use]
199 pub fn range_selectivity(&self, lower: Option<f64>, upper: Option<f64>) -> f64 {
200 if self.buckets.is_empty() || self.total_rows == 0 {
201 return 0.33; }
203
204 let mut matching_rows = 0.0;
205
206 for bucket in &self.buckets {
207 let bucket_lower = bucket.lower_bound;
209 let bucket_upper = bucket.upper_bound;
210
211 if let Some(l) = lower
213 && bucket_upper <= l
214 {
215 continue;
216 }
217 if let Some(u) = upper
218 && bucket_lower >= u
219 {
220 continue;
221 }
222
223 let overlap = bucket.overlap_fraction(lower, upper);
225 matching_rows += overlap * bucket.frequency as f64;
226 }
227
228 (matching_rows / self.total_rows as f64).clamp(0.0, 1.0)
229 }
230
231 #[must_use]
235 pub fn equality_selectivity(&self, value: f64) -> f64 {
236 if self.buckets.is_empty() || self.total_rows == 0 {
237 return 0.01; }
239
240 for bucket in &self.buckets {
242 if bucket.contains(value) {
243 if bucket.distinct_count > 0 {
245 return (bucket.frequency as f64
246 / bucket.distinct_count as f64
247 / self.total_rows as f64)
248 .min(1.0);
249 }
250 }
251 }
252
253 0.001
255 }
256
257 #[must_use]
259 pub fn min_value(&self) -> Option<f64> {
260 self.buckets.first().map(|b| b.lower_bound)
261 }
262
263 #[must_use]
265 pub fn max_value(&self) -> Option<f64> {
266 self.buckets.last().map(|b| b.upper_bound)
267 }
268}
269
270fn count_and_conjuncts(expr: &LogicalExpression) -> usize {
276 match expr {
277 LogicalExpression::Binary {
278 op: BinaryOp::And,
279 left,
280 right,
281 } => count_and_conjuncts(left) + count_and_conjuncts(right),
282 _ => 1,
283 }
284}
285
286fn count_distinct(sorted_values: &[f64]) -> u64 {
287 if sorted_values.is_empty() {
288 return 0;
289 }
290
291 let mut count = 1u64;
292 let mut prev = sorted_values[0];
293
294 for &val in &sorted_values[1..] {
295 if (val - prev).abs() > f64::EPSILON {
296 count += 1;
297 prev = val;
298 }
299 }
300
301 count
302}
303
304#[derive(Debug, Clone)]
306pub struct TableStats {
307 pub row_count: u64,
309 pub columns: HashMap<String, ColumnStats>,
311}
312
313impl TableStats {
314 #[must_use]
316 pub fn new(row_count: u64) -> Self {
317 Self {
318 row_count,
319 columns: HashMap::new(),
320 }
321 }
322
323 pub fn with_column(mut self, name: &str, stats: ColumnStats) -> Self {
325 self.columns.insert(name.to_string(), stats);
326 self
327 }
328}
329
330#[derive(Debug, Clone)]
332pub struct ColumnStats {
333 pub distinct_count: u64,
335 pub null_count: u64,
337 pub min_value: Option<f64>,
339 pub max_value: Option<f64>,
341 pub histogram: Option<EquiDepthHistogram>,
343}
344
345impl ColumnStats {
346 #[must_use]
348 pub fn new(distinct_count: u64) -> Self {
349 Self {
350 distinct_count,
351 null_count: 0,
352 min_value: None,
353 max_value: None,
354 histogram: None,
355 }
356 }
357
358 #[must_use]
360 pub fn with_nulls(mut self, null_count: u64) -> Self {
361 self.null_count = null_count;
362 self
363 }
364
365 #[must_use]
367 pub fn with_range(mut self, min: f64, max: f64) -> Self {
368 self.min_value = Some(min);
369 self.max_value = Some(max);
370 self
371 }
372
373 #[must_use]
375 pub fn with_histogram(mut self, histogram: EquiDepthHistogram) -> Self {
376 self.histogram = Some(histogram);
377 self
378 }
379
380 #[must_use]
388 pub fn from_values(mut values: Vec<f64>, num_buckets: usize) -> Self {
389 if values.is_empty() {
390 return Self::new(0);
391 }
392
393 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
395
396 let min = values.first().copied();
397 let max = values.last().copied();
398 let distinct_count = count_distinct(&values);
399 let histogram = EquiDepthHistogram::build(&values, num_buckets);
400
401 Self {
402 distinct_count,
403 null_count: 0,
404 min_value: min,
405 max_value: max,
406 histogram: Some(histogram),
407 }
408 }
409}
410
411#[derive(Debug, Clone)]
417pub struct SelectivityConfig {
418 pub default: f64,
420 pub equality: f64,
422 pub inequality: f64,
424 pub range: f64,
426 pub string_ops: f64,
428 pub membership: f64,
430 pub is_null: f64,
432 pub is_not_null: f64,
434 pub distinct_fraction: f64,
436}
437
438impl SelectivityConfig {
439 #[must_use]
441 pub fn new() -> Self {
442 Self {
443 default: 0.1,
444 equality: 0.01,
445 inequality: 0.99,
446 range: 0.33,
447 string_ops: 0.1,
448 membership: 0.1,
449 is_null: 0.05,
450 is_not_null: 0.95,
451 distinct_fraction: 0.5,
452 }
453 }
454}
455
456impl Default for SelectivityConfig {
457 fn default() -> Self {
458 Self::new()
459 }
460}
461
462#[derive(Debug, Clone)]
464pub struct EstimationEntry {
465 pub operator: String,
467 pub estimated: f64,
469 pub actual: f64,
471}
472
473impl EstimationEntry {
474 #[must_use]
480 pub fn error_ratio(&self) -> f64 {
481 if self.estimated.abs() < f64::EPSILON {
482 if self.actual.abs() < f64::EPSILON {
483 1.0
484 } else {
485 f64::INFINITY
486 }
487 } else {
488 self.actual / self.estimated
489 }
490 }
491}
492
493#[derive(Debug, Clone, Default)]
500pub struct EstimationLog {
501 entries: Vec<EstimationEntry>,
503 replan_threshold: f64,
507}
508
509impl EstimationLog {
510 #[must_use]
512 pub fn new(replan_threshold: f64) -> Self {
513 Self {
514 entries: Vec::new(),
515 replan_threshold,
516 }
517 }
518
519 pub fn record(&mut self, operator: impl Into<String>, estimated: f64, actual: f64) {
521 self.entries.push(EstimationEntry {
522 operator: operator.into(),
523 estimated,
524 actual,
525 });
526 }
527
528 #[must_use]
530 pub fn entries(&self) -> &[EstimationEntry] {
531 &self.entries
532 }
533
534 #[must_use]
537 pub fn should_replan(&self) -> bool {
538 self.entries.iter().any(|e| {
539 let ratio = e.error_ratio();
540 ratio > self.replan_threshold || ratio < 1.0 / self.replan_threshold
541 })
542 }
543
544 #[must_use]
546 pub fn max_error_ratio(&self) -> f64 {
547 self.entries
548 .iter()
549 .map(|e| {
550 let r = e.error_ratio();
551 if r < 1.0 { 1.0 / r } else { r }
553 })
554 .fold(1.0_f64, f64::max)
555 }
556
557 pub fn clear(&mut self) {
559 self.entries.clear();
560 }
561}
562
563pub struct CardinalityEstimator {
565 table_stats: HashMap<String, TableStats>,
567 default_row_count: u64,
569 default_selectivity: f64,
571 avg_fanout: f64,
573 selectivity_config: SelectivityConfig,
575 rdf_statistics: Option<grafeo_core::statistics::RdfStatistics>,
577}
578
579impl CardinalityEstimator {
580 #[must_use]
582 pub fn new() -> Self {
583 let config = SelectivityConfig::new();
584 Self {
585 table_stats: HashMap::new(),
586 default_row_count: 1000,
587 default_selectivity: config.default,
588 avg_fanout: 10.0,
589 selectivity_config: config,
590 rdf_statistics: None,
591 }
592 }
593
594 #[must_use]
596 pub fn with_selectivity_config(config: SelectivityConfig) -> Self {
597 Self {
598 table_stats: HashMap::new(),
599 default_row_count: 1000,
600 default_selectivity: config.default,
601 avg_fanout: 10.0,
602 selectivity_config: config,
603 rdf_statistics: None,
604 }
605 }
606
607 #[must_use]
609 pub fn selectivity_config(&self) -> &SelectivityConfig {
610 &self.selectivity_config
611 }
612
613 #[must_use]
615 pub fn create_estimation_log() -> EstimationLog {
616 EstimationLog::new(10.0)
617 }
618
619 #[must_use]
625 pub fn from_statistics(stats: &grafeo_core::statistics::Statistics) -> Self {
626 let mut estimator = Self::new();
627
628 if stats.total_nodes > 0 {
630 estimator.default_row_count = stats.total_nodes;
631 }
632
633 for (label, label_stats) in &stats.labels {
635 let mut table_stats = TableStats::new(label_stats.node_count);
636
637 for (prop, col_stats) in &label_stats.properties {
639 let optimizer_col =
640 ColumnStats::new(col_stats.distinct_count).with_nulls(col_stats.null_count);
641 table_stats = table_stats.with_column(prop, optimizer_col);
642 }
643
644 estimator.add_table_stats(label, table_stats);
645 }
646
647 if !stats.edge_types.is_empty() {
649 let total_out_degree: f64 = stats.edge_types.values().map(|e| e.avg_out_degree).sum();
650 estimator.avg_fanout = total_out_degree / stats.edge_types.len() as f64;
651 } else if stats.total_nodes > 0 {
652 estimator.avg_fanout = stats.total_edges as f64 / stats.total_nodes as f64;
653 }
654
655 if estimator.avg_fanout < 1.0 {
657 estimator.avg_fanout = 1.0;
658 }
659
660 estimator
661 }
662
663 #[must_use]
668 pub fn from_rdf_statistics(rdf_stats: grafeo_core::statistics::RdfStatistics) -> Self {
669 let mut estimator = Self::new();
670 if rdf_stats.total_triples > 0 {
671 estimator.default_row_count = rdf_stats.total_triples;
672 }
673 estimator.rdf_statistics = Some(rdf_stats);
674 estimator
675 }
676
677 pub fn add_table_stats(&mut self, name: &str, stats: TableStats) {
679 self.table_stats.insert(name.to_string(), stats);
680 }
681
682 pub fn set_avg_fanout(&mut self, fanout: f64) {
684 self.avg_fanout = fanout;
685 }
686
687 #[must_use]
689 pub fn estimate(&self, op: &LogicalOperator) -> f64 {
690 match op {
691 LogicalOperator::NodeScan(scan) => self.estimate_node_scan(scan),
692 LogicalOperator::Filter(filter) => self.estimate_filter(filter),
693 LogicalOperator::Project(project) => self.estimate_project(project),
694 LogicalOperator::Expand(expand) => self.estimate_expand(expand),
695 LogicalOperator::Join(join) => self.estimate_join(join),
696 LogicalOperator::Aggregate(agg) => self.estimate_aggregate(agg),
697 LogicalOperator::Sort(sort) => self.estimate_sort(sort),
698 LogicalOperator::Distinct(distinct) => self.estimate_distinct(distinct),
699 LogicalOperator::Limit(limit) => self.estimate_limit(limit),
700 LogicalOperator::Skip(skip) => self.estimate_skip(skip),
701 LogicalOperator::Return(ret) => self.estimate(&ret.input),
702 LogicalOperator::Empty => 0.0,
703 LogicalOperator::VectorScan(scan) => self.estimate_vector_scan(scan),
704 LogicalOperator::VectorJoin(join) => self.estimate_vector_join(join),
705 LogicalOperator::MultiWayJoin(mwj) => self.estimate_multi_way_join(mwj),
706 LogicalOperator::LeftJoin(lj) => self.estimate_left_join(lj),
707 LogicalOperator::TripleScan(scan) => self.estimate_triple_scan(scan),
708 _ => self.default_row_count as f64,
709 }
710 }
711
712 fn estimate_node_scan(&self, scan: &NodeScanOp) -> f64 {
714 if let Some(label) = &scan.label
715 && let Some(stats) = self.table_stats.get(label)
716 {
717 return stats.row_count as f64;
718 }
719 self.default_row_count as f64
721 }
722
723 fn estimate_triple_scan(&self, scan: &TripleScanOp) -> f64 {
729 let base = if let Some(ref input) = scan.input {
732 self.estimate(input)
733 } else {
734 1.0
735 };
736
737 let Some(rdf_stats) = &self.rdf_statistics else {
738 return if scan.input.is_some() {
739 base * self.default_row_count as f64
740 } else {
741 self.default_row_count as f64
742 };
743 };
744
745 let subject_bound = matches!(
746 scan.subject,
747 TripleComponent::Iri(_)
748 | TripleComponent::Literal(_)
749 | TripleComponent::LangLiteral { .. }
750 );
751 let object_bound = matches!(
752 scan.object,
753 TripleComponent::Iri(_)
754 | TripleComponent::Literal(_)
755 | TripleComponent::LangLiteral { .. }
756 );
757 let predicate_iri = match &scan.predicate {
758 TripleComponent::Iri(iri) => Some(iri.as_str()),
759 _ => None,
760 };
761
762 let pattern_card = rdf_stats.estimate_triple_pattern_cardinality(
763 subject_bound,
764 predicate_iri,
765 object_bound,
766 );
767
768 if scan.input.is_some() {
769 let selectivity = if rdf_stats.total_triples > 0 {
771 pattern_card / rdf_stats.total_triples as f64
772 } else {
773 1.0
774 };
775 (base * pattern_card * selectivity).max(1.0)
776 } else {
777 pattern_card.max(1.0)
778 }
779 }
780
781 fn estimate_filter(&self, filter: &FilterOp) -> f64 {
783 let input_cardinality = self.estimate(&filter.input);
784 let selectivity = self.estimate_selectivity(&filter.predicate);
785 (input_cardinality * selectivity).max(1.0)
786 }
787
788 fn estimate_project(&self, project: &ProjectOp) -> f64 {
790 self.estimate(&project.input)
791 }
792
793 fn estimate_expand(&self, expand: &ExpandOp) -> f64 {
795 let input_cardinality = self.estimate(&expand.input);
796
797 let fanout = if !expand.edge_types.is_empty() {
799 self.avg_fanout * 0.5
801 } else {
802 self.avg_fanout
803 };
804
805 let path_multiplier = if expand.max_hops.unwrap_or(1) > 1 {
807 let min = expand.min_hops as f64;
808 let max = expand.max_hops.unwrap_or(expand.min_hops + 3) as f64;
809 (fanout.powf(max + 1.0) - fanout.powf(min)) / (fanout - 1.0)
811 } else {
812 fanout
813 };
814
815 (input_cardinality * path_multiplier).max(1.0)
816 }
817
818 fn estimate_join(&self, join: &JoinOp) -> f64 {
820 let left_card = self.estimate(&join.left);
821 let right_card = self.estimate(&join.right);
822
823 match join.join_type {
824 JoinType::Cross => left_card * right_card,
825 JoinType::Inner => {
826 let selectivity = if join.conditions.is_empty() {
828 1.0 } else {
830 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
833 let exp = join.conditions.len() as i32;
834 0.1_f64.powi(exp)
835 };
836 (left_card * right_card * selectivity).max(1.0)
837 }
838 JoinType::Left => {
839 let inner_card = self.estimate_join(&JoinOp {
841 left: join.left.clone(),
842 right: join.right.clone(),
843 join_type: JoinType::Inner,
844 conditions: join.conditions.clone(),
845 });
846 inner_card.max(left_card)
847 }
848 JoinType::Right => {
849 let inner_card = self.estimate_join(&JoinOp {
851 left: join.left.clone(),
852 right: join.right.clone(),
853 join_type: JoinType::Inner,
854 conditions: join.conditions.clone(),
855 });
856 inner_card.max(right_card)
857 }
858 JoinType::Full => {
859 let inner_card = self.estimate_join(&JoinOp {
861 left: join.left.clone(),
862 right: join.right.clone(),
863 join_type: JoinType::Inner,
864 conditions: join.conditions.clone(),
865 });
866 inner_card.max(left_card.max(right_card))
867 }
868 JoinType::Semi => {
869 (left_card * self.default_selectivity).max(1.0)
871 }
872 JoinType::Anti => {
873 (left_card * (1.0 - self.default_selectivity)).max(1.0)
875 }
876 }
877 }
878
879 fn estimate_left_join(&self, lj: &LeftJoinOp) -> f64 {
888 let left_card = self.estimate(&lj.left);
889 let right_card = self.estimate(&lj.right);
890
891 let condition_selectivity = if let Some(cond) = &lj.condition {
894 let n = count_and_conjuncts(cond);
895 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
897 let exp = n as i32;
898 self.default_selectivity.powi(exp)
899 } else {
900 self.default_selectivity
901 };
902
903 let inner_estimate = left_card * right_card * condition_selectivity;
905 inner_estimate.max(left_card).max(1.0)
906 }
907
908 fn estimate_aggregate(&self, agg: &AggregateOp) -> f64 {
910 let input_cardinality = self.estimate(&agg.input);
911
912 if agg.group_by.is_empty() {
913 1.0
915 } else {
916 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
920 let exp = agg.group_by.len() as i32;
921 let group_reduction = 10.0_f64.powi(exp);
922 (input_cardinality / group_reduction).max(1.0)
923 }
924 }
925
926 fn estimate_sort(&self, sort: &SortOp) -> f64 {
928 self.estimate(&sort.input)
929 }
930
931 fn estimate_distinct(&self, distinct: &DistinctOp) -> f64 {
933 let input_cardinality = self.estimate(&distinct.input);
934 (input_cardinality * self.selectivity_config.distinct_fraction).max(1.0)
935 }
936
937 fn estimate_limit(&self, limit: &LimitOp) -> f64 {
939 let input_cardinality = self.estimate(&limit.input);
940 limit.count.estimate().min(input_cardinality)
941 }
942
943 fn estimate_skip(&self, skip: &SkipOp) -> f64 {
945 let input_cardinality = self.estimate(&skip.input);
946 (input_cardinality - skip.count.estimate()).max(0.0)
947 }
948
949 fn estimate_vector_scan(&self, scan: &VectorScanOp) -> f64 {
954 let base_k = scan.k as f64;
955
956 let selectivity = if scan.min_similarity.is_some() || scan.max_distance.is_some() {
958 0.7
960 } else {
961 1.0
962 };
963
964 (base_k * selectivity).max(1.0)
965 }
966
967 fn estimate_vector_join(&self, join: &VectorJoinOp) -> f64 {
971 let input_cardinality = self.estimate(&join.input);
972 let k = join.k as f64;
973
974 let selectivity = if join.min_similarity.is_some() || join.max_distance.is_some() {
976 0.7
977 } else {
978 1.0
979 };
980
981 (input_cardinality * k * selectivity).max(1.0)
982 }
983
984 fn estimate_multi_way_join(&self, mwj: &MultiWayJoinOp) -> f64 {
989 if mwj.inputs.is_empty() {
990 return 0.0;
991 }
992 let cardinalities: Vec<f64> = mwj
993 .inputs
994 .iter()
995 .map(|input| self.estimate(input))
996 .collect();
997 let min_card = cardinalities.iter().copied().fold(f64::INFINITY, f64::min);
998 let n = cardinalities.len() as f64;
999 (min_card.powf(n / 2.0)).max(1.0)
1001 }
1002
1003 fn estimate_selectivity(&self, expr: &LogicalExpression) -> f64 {
1005 match expr {
1006 LogicalExpression::Binary { left, op, right } => {
1007 self.estimate_binary_selectivity(left, *op, right)
1008 }
1009 LogicalExpression::Unary { op, operand } => {
1010 self.estimate_unary_selectivity(*op, operand)
1011 }
1012 LogicalExpression::Literal(value) => {
1013 if let grafeo_common::types::Value::Bool(b) = value {
1015 if *b { 1.0 } else { 0.0 }
1016 } else {
1017 self.default_selectivity
1018 }
1019 }
1020 _ => self.default_selectivity,
1021 }
1022 }
1023
1024 fn estimate_binary_selectivity(
1026 &self,
1027 left: &LogicalExpression,
1028 op: BinaryOp,
1029 right: &LogicalExpression,
1030 ) -> f64 {
1031 match op {
1032 BinaryOp::Eq => {
1034 if let Some(selectivity) = self.try_equality_selectivity(left, right) {
1035 return selectivity;
1036 }
1037 self.selectivity_config.equality
1038 }
1039 BinaryOp::Ne => self.selectivity_config.inequality,
1041 BinaryOp::Lt | BinaryOp::Le | BinaryOp::Gt | BinaryOp::Ge => {
1043 if let Some(selectivity) = self.try_range_selectivity(left, op, right) {
1044 return selectivity;
1045 }
1046 self.selectivity_config.range
1047 }
1048 BinaryOp::And => {
1050 let left_sel = self.estimate_selectivity(left);
1051 let right_sel = self.estimate_selectivity(right);
1052 left_sel * right_sel
1054 }
1055 BinaryOp::Or => {
1056 let left_sel = self.estimate_selectivity(left);
1057 let right_sel = self.estimate_selectivity(right);
1058 (left_sel + right_sel - left_sel * right_sel).min(1.0)
1061 }
1062 BinaryOp::StartsWith | BinaryOp::EndsWith | BinaryOp::Contains | BinaryOp::Like => {
1064 self.selectivity_config.string_ops
1065 }
1066 BinaryOp::In => self.selectivity_config.membership,
1068 _ => self.default_selectivity,
1070 }
1071 }
1072
1073 fn try_equality_selectivity(
1075 &self,
1076 left: &LogicalExpression,
1077 right: &LogicalExpression,
1078 ) -> Option<f64> {
1079 let (label, column, value) = self.extract_column_and_value(left, right)?;
1081
1082 let stats = self.get_column_stats(&label, &column)?;
1084
1085 if let Some(ref histogram) = stats.histogram {
1087 return Some(histogram.equality_selectivity(value));
1088 }
1089
1090 if stats.distinct_count > 0 {
1092 return Some(1.0 / stats.distinct_count as f64);
1093 }
1094
1095 None
1096 }
1097
1098 fn try_range_selectivity(
1100 &self,
1101 left: &LogicalExpression,
1102 op: BinaryOp,
1103 right: &LogicalExpression,
1104 ) -> Option<f64> {
1105 let (label, column, value) = self.extract_column_and_value(left, right)?;
1107
1108 let stats = self.get_column_stats(&label, &column)?;
1110
1111 let (lower, upper) = match op {
1113 BinaryOp::Lt => (None, Some(value)),
1114 BinaryOp::Le => (None, Some(value + f64::EPSILON)),
1115 BinaryOp::Gt => (Some(value + f64::EPSILON), None),
1116 BinaryOp::Ge => (Some(value), None),
1117 _ => return None,
1118 };
1119
1120 if let Some(ref histogram) = stats.histogram {
1122 return Some(histogram.range_selectivity(lower, upper));
1123 }
1124
1125 if let (Some(min), Some(max)) = (stats.min_value, stats.max_value) {
1127 let range = max - min;
1128 if range <= 0.0 {
1129 return Some(1.0);
1130 }
1131
1132 let effective_lower = lower.unwrap_or(min).max(min);
1133 let effective_upper = upper.unwrap_or(max).min(max);
1134 let overlap = (effective_upper - effective_lower).max(0.0);
1135 return Some((overlap / range).clamp(0.0, 1.0));
1136 }
1137
1138 None
1139 }
1140
1141 fn extract_column_and_value(
1146 &self,
1147 left: &LogicalExpression,
1148 right: &LogicalExpression,
1149 ) -> Option<(String, String, f64)> {
1150 if let Some(result) = self.try_extract_property_literal(left, right) {
1152 return Some(result);
1153 }
1154
1155 self.try_extract_property_literal(right, left)
1157 }
1158
1159 fn try_extract_property_literal(
1161 &self,
1162 property_expr: &LogicalExpression,
1163 literal_expr: &LogicalExpression,
1164 ) -> Option<(String, String, f64)> {
1165 let (variable, property) = match property_expr {
1167 LogicalExpression::Property { variable, property } => {
1168 (variable.clone(), property.clone())
1169 }
1170 _ => return None,
1171 };
1172
1173 let value = match literal_expr {
1175 LogicalExpression::Literal(grafeo_common::types::Value::Int64(n)) => *n as f64,
1176 LogicalExpression::Literal(grafeo_common::types::Value::Float64(f)) => *f,
1177 _ => return None,
1178 };
1179
1180 for label in self.table_stats.keys() {
1184 if let Some(stats) = self.table_stats.get(label)
1185 && stats.columns.contains_key(&property)
1186 {
1187 return Some((label.clone(), property, value));
1188 }
1189 }
1190
1191 Some((variable, property, value))
1193 }
1194
1195 fn estimate_unary_selectivity(&self, op: UnaryOp, _operand: &LogicalExpression) -> f64 {
1197 match op {
1198 UnaryOp::Not => 1.0 - self.default_selectivity,
1199 UnaryOp::IsNull => self.selectivity_config.is_null,
1200 UnaryOp::IsNotNull => self.selectivity_config.is_not_null,
1201 UnaryOp::Neg => 1.0, }
1203 }
1204
1205 fn get_column_stats(&self, label: &str, column: &str) -> Option<&ColumnStats> {
1207 self.table_stats.get(label)?.columns.get(column)
1208 }
1209}
1210
1211impl Default for CardinalityEstimator {
1212 fn default() -> Self {
1213 Self::new()
1214 }
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219 use super::*;
1220 use crate::query::plan::{
1221 DistinctOp, ExpandDirection, ExpandOp, FilterOp, JoinCondition, NodeScanOp, PathMode,
1222 ProjectOp, Projection, ReturnItem, ReturnOp, SkipOp, SortKey, SortOp, SortOrder,
1223 };
1224 use grafeo_common::types::Value;
1225
1226 #[test]
1227 fn test_node_scan_with_stats() {
1228 let mut estimator = CardinalityEstimator::new();
1229 estimator.add_table_stats("Person", TableStats::new(5000));
1230
1231 let scan = LogicalOperator::NodeScan(NodeScanOp {
1232 variable: "n".to_string(),
1233 label: Some("Person".to_string()),
1234 input: None,
1235 });
1236
1237 let cardinality = estimator.estimate(&scan);
1238 assert!((cardinality - 5000.0).abs() < 0.001);
1239 }
1240
1241 #[test]
1242 fn test_filter_reduces_cardinality() {
1243 let mut estimator = CardinalityEstimator::new();
1244 estimator.add_table_stats("Person", TableStats::new(1000));
1245
1246 let filter = LogicalOperator::Filter(FilterOp {
1247 predicate: LogicalExpression::Binary {
1248 left: Box::new(LogicalExpression::Property {
1249 variable: "n".to_string(),
1250 property: "age".to_string(),
1251 }),
1252 op: BinaryOp::Eq,
1253 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1254 },
1255 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1256 variable: "n".to_string(),
1257 label: Some("Person".to_string()),
1258 input: None,
1259 })),
1260 pushdown_hint: None,
1261 });
1262
1263 let cardinality = estimator.estimate(&filter);
1264 assert!(cardinality < 1000.0);
1266 assert!(cardinality >= 1.0);
1267 }
1268
1269 #[test]
1270 fn test_join_cardinality() {
1271 let mut estimator = CardinalityEstimator::new();
1272 estimator.add_table_stats("Person", TableStats::new(1000));
1273 estimator.add_table_stats("Company", TableStats::new(100));
1274
1275 let join = LogicalOperator::Join(JoinOp {
1276 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1277 variable: "p".to_string(),
1278 label: Some("Person".to_string()),
1279 input: None,
1280 })),
1281 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1282 variable: "c".to_string(),
1283 label: Some("Company".to_string()),
1284 input: None,
1285 })),
1286 join_type: JoinType::Inner,
1287 conditions: vec![JoinCondition {
1288 left: LogicalExpression::Property {
1289 variable: "p".to_string(),
1290 property: "company_id".to_string(),
1291 },
1292 right: LogicalExpression::Property {
1293 variable: "c".to_string(),
1294 property: "id".to_string(),
1295 },
1296 }],
1297 });
1298
1299 let cardinality = estimator.estimate(&join);
1300 assert!(cardinality < 1000.0 * 100.0);
1302 }
1303
1304 #[test]
1305 fn test_limit_caps_cardinality() {
1306 let mut estimator = CardinalityEstimator::new();
1307 estimator.add_table_stats("Person", TableStats::new(1000));
1308
1309 let limit = LogicalOperator::Limit(LimitOp {
1310 count: 10.into(),
1311 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1312 variable: "n".to_string(),
1313 label: Some("Person".to_string()),
1314 input: None,
1315 })),
1316 });
1317
1318 let cardinality = estimator.estimate(&limit);
1319 assert!((cardinality - 10.0).abs() < 0.001);
1320 }
1321
1322 #[test]
1323 fn test_aggregate_reduces_cardinality() {
1324 let mut estimator = CardinalityEstimator::new();
1325 estimator.add_table_stats("Person", TableStats::new(1000));
1326
1327 let global_agg = LogicalOperator::Aggregate(AggregateOp {
1329 group_by: vec![],
1330 aggregates: vec![],
1331 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1332 variable: "n".to_string(),
1333 label: Some("Person".to_string()),
1334 input: None,
1335 })),
1336 having: None,
1337 });
1338
1339 let cardinality = estimator.estimate(&global_agg);
1340 assert!((cardinality - 1.0).abs() < 0.001);
1341
1342 let group_agg = LogicalOperator::Aggregate(AggregateOp {
1344 group_by: vec![LogicalExpression::Property {
1345 variable: "n".to_string(),
1346 property: "city".to_string(),
1347 }],
1348 aggregates: vec![],
1349 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1350 variable: "n".to_string(),
1351 label: Some("Person".to_string()),
1352 input: None,
1353 })),
1354 having: None,
1355 });
1356
1357 let cardinality = estimator.estimate(&group_agg);
1358 assert!(cardinality < 1000.0);
1360 }
1361
1362 #[test]
1363 fn test_node_scan_without_stats() {
1364 let estimator = CardinalityEstimator::new();
1365
1366 let scan = LogicalOperator::NodeScan(NodeScanOp {
1367 variable: "n".to_string(),
1368 label: Some("Unknown".to_string()),
1369 input: None,
1370 });
1371
1372 let cardinality = estimator.estimate(&scan);
1373 assert!((cardinality - 1000.0).abs() < 0.001);
1375 }
1376
1377 #[test]
1378 fn test_node_scan_no_label() {
1379 let estimator = CardinalityEstimator::new();
1380
1381 let scan = LogicalOperator::NodeScan(NodeScanOp {
1382 variable: "n".to_string(),
1383 label: None,
1384 input: None,
1385 });
1386
1387 let cardinality = estimator.estimate(&scan);
1388 assert!((cardinality - 1000.0).abs() < 0.001);
1390 }
1391
1392 #[test]
1393 fn test_filter_inequality_selectivity() {
1394 let mut estimator = CardinalityEstimator::new();
1395 estimator.add_table_stats("Person", TableStats::new(1000));
1396
1397 let filter = LogicalOperator::Filter(FilterOp {
1398 predicate: LogicalExpression::Binary {
1399 left: Box::new(LogicalExpression::Property {
1400 variable: "n".to_string(),
1401 property: "age".to_string(),
1402 }),
1403 op: BinaryOp::Ne,
1404 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1405 },
1406 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1407 variable: "n".to_string(),
1408 label: Some("Person".to_string()),
1409 input: None,
1410 })),
1411 pushdown_hint: None,
1412 });
1413
1414 let cardinality = estimator.estimate(&filter);
1415 assert!(cardinality > 900.0);
1417 }
1418
1419 #[test]
1420 fn test_filter_range_selectivity() {
1421 let mut estimator = CardinalityEstimator::new();
1422 estimator.add_table_stats("Person", TableStats::new(1000));
1423
1424 let filter = LogicalOperator::Filter(FilterOp {
1425 predicate: LogicalExpression::Binary {
1426 left: Box::new(LogicalExpression::Property {
1427 variable: "n".to_string(),
1428 property: "age".to_string(),
1429 }),
1430 op: BinaryOp::Gt,
1431 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1432 },
1433 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1434 variable: "n".to_string(),
1435 label: Some("Person".to_string()),
1436 input: None,
1437 })),
1438 pushdown_hint: None,
1439 });
1440
1441 let cardinality = estimator.estimate(&filter);
1442 assert!(cardinality < 500.0);
1444 assert!(cardinality > 100.0);
1445 }
1446
1447 #[test]
1448 fn test_filter_and_selectivity() {
1449 let mut estimator = CardinalityEstimator::new();
1450 estimator.add_table_stats("Person", TableStats::new(1000));
1451
1452 let filter = LogicalOperator::Filter(FilterOp {
1455 predicate: LogicalExpression::Binary {
1456 left: Box::new(LogicalExpression::Binary {
1457 left: Box::new(LogicalExpression::Property {
1458 variable: "n".to_string(),
1459 property: "city".to_string(),
1460 }),
1461 op: BinaryOp::Eq,
1462 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1463 }),
1464 op: BinaryOp::And,
1465 right: Box::new(LogicalExpression::Binary {
1466 left: Box::new(LogicalExpression::Property {
1467 variable: "n".to_string(),
1468 property: "age".to_string(),
1469 }),
1470 op: BinaryOp::Eq,
1471 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
1472 }),
1473 },
1474 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1475 variable: "n".to_string(),
1476 label: Some("Person".to_string()),
1477 input: None,
1478 })),
1479 pushdown_hint: None,
1480 });
1481
1482 let cardinality = estimator.estimate(&filter);
1483 assert!(cardinality < 100.0);
1486 assert!(cardinality >= 1.0);
1487 }
1488
1489 #[test]
1490 fn test_filter_or_selectivity() {
1491 let mut estimator = CardinalityEstimator::new();
1492 estimator.add_table_stats("Person", TableStats::new(1000));
1493
1494 let filter = LogicalOperator::Filter(FilterOp {
1498 predicate: LogicalExpression::Binary {
1499 left: Box::new(LogicalExpression::Binary {
1500 left: Box::new(LogicalExpression::Property {
1501 variable: "n".to_string(),
1502 property: "city".to_string(),
1503 }),
1504 op: BinaryOp::Eq,
1505 right: Box::new(LogicalExpression::Literal(Value::String("NYC".into()))),
1506 }),
1507 op: BinaryOp::Or,
1508 right: Box::new(LogicalExpression::Binary {
1509 left: Box::new(LogicalExpression::Property {
1510 variable: "n".to_string(),
1511 property: "city".to_string(),
1512 }),
1513 op: BinaryOp::Eq,
1514 right: Box::new(LogicalExpression::Literal(Value::String("LA".into()))),
1515 }),
1516 },
1517 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1518 variable: "n".to_string(),
1519 label: Some("Person".to_string()),
1520 input: None,
1521 })),
1522 pushdown_hint: None,
1523 });
1524
1525 let cardinality = estimator.estimate(&filter);
1526 assert!(cardinality < 100.0);
1528 assert!(cardinality >= 1.0);
1529 }
1530
1531 #[test]
1532 fn test_filter_literal_true() {
1533 let mut estimator = CardinalityEstimator::new();
1534 estimator.add_table_stats("Person", TableStats::new(1000));
1535
1536 let filter = LogicalOperator::Filter(FilterOp {
1537 predicate: LogicalExpression::Literal(Value::Bool(true)),
1538 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1539 variable: "n".to_string(),
1540 label: Some("Person".to_string()),
1541 input: None,
1542 })),
1543 pushdown_hint: None,
1544 });
1545
1546 let cardinality = estimator.estimate(&filter);
1547 assert!((cardinality - 1000.0).abs() < 0.001);
1549 }
1550
1551 #[test]
1552 fn test_filter_literal_false() {
1553 let mut estimator = CardinalityEstimator::new();
1554 estimator.add_table_stats("Person", TableStats::new(1000));
1555
1556 let filter = LogicalOperator::Filter(FilterOp {
1557 predicate: LogicalExpression::Literal(Value::Bool(false)),
1558 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1559 variable: "n".to_string(),
1560 label: Some("Person".to_string()),
1561 input: None,
1562 })),
1563 pushdown_hint: None,
1564 });
1565
1566 let cardinality = estimator.estimate(&filter);
1567 assert!((cardinality - 1.0).abs() < 0.001);
1569 }
1570
1571 #[test]
1572 fn test_unary_not_selectivity() {
1573 let mut estimator = CardinalityEstimator::new();
1574 estimator.add_table_stats("Person", TableStats::new(1000));
1575
1576 let filter = LogicalOperator::Filter(FilterOp {
1577 predicate: LogicalExpression::Unary {
1578 op: UnaryOp::Not,
1579 operand: Box::new(LogicalExpression::Literal(Value::Bool(true))),
1580 },
1581 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1582 variable: "n".to_string(),
1583 label: Some("Person".to_string()),
1584 input: None,
1585 })),
1586 pushdown_hint: None,
1587 });
1588
1589 let cardinality = estimator.estimate(&filter);
1590 assert!(cardinality < 1000.0);
1592 }
1593
1594 #[test]
1595 fn test_unary_is_null_selectivity() {
1596 let mut estimator = CardinalityEstimator::new();
1597 estimator.add_table_stats("Person", TableStats::new(1000));
1598
1599 let filter = LogicalOperator::Filter(FilterOp {
1600 predicate: LogicalExpression::Unary {
1601 op: UnaryOp::IsNull,
1602 operand: Box::new(LogicalExpression::Variable("x".to_string())),
1603 },
1604 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1605 variable: "n".to_string(),
1606 label: Some("Person".to_string()),
1607 input: None,
1608 })),
1609 pushdown_hint: None,
1610 });
1611
1612 let cardinality = estimator.estimate(&filter);
1613 assert!(cardinality < 100.0);
1615 }
1616
1617 #[test]
1618 fn test_expand_cardinality() {
1619 let mut estimator = CardinalityEstimator::new();
1620 estimator.add_table_stats("Person", TableStats::new(100));
1621
1622 let expand = LogicalOperator::Expand(ExpandOp {
1623 from_variable: "a".to_string(),
1624 to_variable: "b".to_string(),
1625 edge_variable: None,
1626 direction: ExpandDirection::Outgoing,
1627 edge_types: vec![],
1628 min_hops: 1,
1629 max_hops: Some(1),
1630 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1631 variable: "a".to_string(),
1632 label: Some("Person".to_string()),
1633 input: None,
1634 })),
1635 path_alias: None,
1636 path_mode: PathMode::Walk,
1637 });
1638
1639 let cardinality = estimator.estimate(&expand);
1640 assert!(cardinality > 100.0);
1642 }
1643
1644 #[test]
1645 fn test_expand_with_edge_type_filter() {
1646 let mut estimator = CardinalityEstimator::new();
1647 estimator.add_table_stats("Person", TableStats::new(100));
1648
1649 let expand = LogicalOperator::Expand(ExpandOp {
1650 from_variable: "a".to_string(),
1651 to_variable: "b".to_string(),
1652 edge_variable: None,
1653 direction: ExpandDirection::Outgoing,
1654 edge_types: vec!["KNOWS".to_string()],
1655 min_hops: 1,
1656 max_hops: Some(1),
1657 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1658 variable: "a".to_string(),
1659 label: Some("Person".to_string()),
1660 input: None,
1661 })),
1662 path_alias: None,
1663 path_mode: PathMode::Walk,
1664 });
1665
1666 let cardinality = estimator.estimate(&expand);
1667 assert!(cardinality > 100.0);
1669 }
1670
1671 #[test]
1672 fn test_expand_variable_length() {
1673 let mut estimator = CardinalityEstimator::new();
1674 estimator.add_table_stats("Person", TableStats::new(100));
1675
1676 let expand = LogicalOperator::Expand(ExpandOp {
1677 from_variable: "a".to_string(),
1678 to_variable: "b".to_string(),
1679 edge_variable: None,
1680 direction: ExpandDirection::Outgoing,
1681 edge_types: vec![],
1682 min_hops: 1,
1683 max_hops: Some(3),
1684 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1685 variable: "a".to_string(),
1686 label: Some("Person".to_string()),
1687 input: None,
1688 })),
1689 path_alias: None,
1690 path_mode: PathMode::Walk,
1691 });
1692
1693 let cardinality = estimator.estimate(&expand);
1694 assert!(cardinality > 500.0);
1696 }
1697
1698 #[test]
1699 fn test_join_cross_product() {
1700 let mut estimator = CardinalityEstimator::new();
1701 estimator.add_table_stats("Person", TableStats::new(100));
1702 estimator.add_table_stats("Company", TableStats::new(50));
1703
1704 let join = LogicalOperator::Join(JoinOp {
1705 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1706 variable: "p".to_string(),
1707 label: Some("Person".to_string()),
1708 input: None,
1709 })),
1710 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1711 variable: "c".to_string(),
1712 label: Some("Company".to_string()),
1713 input: None,
1714 })),
1715 join_type: JoinType::Cross,
1716 conditions: vec![],
1717 });
1718
1719 let cardinality = estimator.estimate(&join);
1720 assert!((cardinality - 5000.0).abs() < 0.001);
1722 }
1723
1724 #[test]
1725 fn test_join_left_outer() {
1726 let mut estimator = CardinalityEstimator::new();
1727 estimator.add_table_stats("Person", TableStats::new(1000));
1728 estimator.add_table_stats("Company", TableStats::new(10));
1729
1730 let join = LogicalOperator::Join(JoinOp {
1731 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1732 variable: "p".to_string(),
1733 label: Some("Person".to_string()),
1734 input: None,
1735 })),
1736 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1737 variable: "c".to_string(),
1738 label: Some("Company".to_string()),
1739 input: None,
1740 })),
1741 join_type: JoinType::Left,
1742 conditions: vec![JoinCondition {
1743 left: LogicalExpression::Variable("p".to_string()),
1744 right: LogicalExpression::Variable("c".to_string()),
1745 }],
1746 });
1747
1748 let cardinality = estimator.estimate(&join);
1749 assert!(cardinality >= 1000.0);
1751 }
1752
1753 #[test]
1754 fn test_join_semi() {
1755 let mut estimator = CardinalityEstimator::new();
1756 estimator.add_table_stats("Person", TableStats::new(1000));
1757 estimator.add_table_stats("Company", TableStats::new(100));
1758
1759 let join = LogicalOperator::Join(JoinOp {
1760 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1761 variable: "p".to_string(),
1762 label: Some("Person".to_string()),
1763 input: None,
1764 })),
1765 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1766 variable: "c".to_string(),
1767 label: Some("Company".to_string()),
1768 input: None,
1769 })),
1770 join_type: JoinType::Semi,
1771 conditions: vec![],
1772 });
1773
1774 let cardinality = estimator.estimate(&join);
1775 assert!(cardinality <= 1000.0);
1777 }
1778
1779 #[test]
1780 fn test_join_anti() {
1781 let mut estimator = CardinalityEstimator::new();
1782 estimator.add_table_stats("Person", TableStats::new(1000));
1783 estimator.add_table_stats("Company", TableStats::new(100));
1784
1785 let join = LogicalOperator::Join(JoinOp {
1786 left: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1787 variable: "p".to_string(),
1788 label: Some("Person".to_string()),
1789 input: None,
1790 })),
1791 right: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1792 variable: "c".to_string(),
1793 label: Some("Company".to_string()),
1794 input: None,
1795 })),
1796 join_type: JoinType::Anti,
1797 conditions: vec![],
1798 });
1799
1800 let cardinality = estimator.estimate(&join);
1801 assert!(cardinality <= 1000.0);
1803 assert!(cardinality >= 1.0);
1804 }
1805
1806 #[test]
1807 fn test_project_preserves_cardinality() {
1808 let mut estimator = CardinalityEstimator::new();
1809 estimator.add_table_stats("Person", TableStats::new(1000));
1810
1811 let project = LogicalOperator::Project(ProjectOp {
1812 projections: vec![Projection {
1813 expression: LogicalExpression::Variable("n".to_string()),
1814 alias: None,
1815 }],
1816 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1817 variable: "n".to_string(),
1818 label: Some("Person".to_string()),
1819 input: None,
1820 })),
1821 pass_through_input: false,
1822 });
1823
1824 let cardinality = estimator.estimate(&project);
1825 assert!((cardinality - 1000.0).abs() < 0.001);
1826 }
1827
1828 #[test]
1829 fn test_sort_preserves_cardinality() {
1830 let mut estimator = CardinalityEstimator::new();
1831 estimator.add_table_stats("Person", TableStats::new(1000));
1832
1833 let sort = LogicalOperator::Sort(SortOp {
1834 keys: vec![SortKey {
1835 expression: LogicalExpression::Variable("n".to_string()),
1836 order: SortOrder::Ascending,
1837 nulls: None,
1838 }],
1839 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1840 variable: "n".to_string(),
1841 label: Some("Person".to_string()),
1842 input: None,
1843 })),
1844 });
1845
1846 let cardinality = estimator.estimate(&sort);
1847 assert!((cardinality - 1000.0).abs() < 0.001);
1848 }
1849
1850 #[test]
1851 fn test_distinct_reduces_cardinality() {
1852 let mut estimator = CardinalityEstimator::new();
1853 estimator.add_table_stats("Person", TableStats::new(1000));
1854
1855 let distinct = LogicalOperator::Distinct(DistinctOp {
1856 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1857 variable: "n".to_string(),
1858 label: Some("Person".to_string()),
1859 input: None,
1860 })),
1861 columns: None,
1862 });
1863
1864 let cardinality = estimator.estimate(&distinct);
1865 assert!((cardinality - 500.0).abs() < 0.001);
1867 }
1868
1869 #[test]
1870 fn test_skip_reduces_cardinality() {
1871 let mut estimator = CardinalityEstimator::new();
1872 estimator.add_table_stats("Person", TableStats::new(1000));
1873
1874 let skip = LogicalOperator::Skip(SkipOp {
1875 count: 100.into(),
1876 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1877 variable: "n".to_string(),
1878 label: Some("Person".to_string()),
1879 input: None,
1880 })),
1881 });
1882
1883 let cardinality = estimator.estimate(&skip);
1884 assert!((cardinality - 900.0).abs() < 0.001);
1885 }
1886
1887 #[test]
1888 fn test_return_preserves_cardinality() {
1889 let mut estimator = CardinalityEstimator::new();
1890 estimator.add_table_stats("Person", TableStats::new(1000));
1891
1892 let ret = LogicalOperator::Return(ReturnOp {
1893 items: vec![ReturnItem {
1894 expression: LogicalExpression::Variable("n".to_string()),
1895 alias: None,
1896 }],
1897 distinct: false,
1898 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1899 variable: "n".to_string(),
1900 label: Some("Person".to_string()),
1901 input: None,
1902 })),
1903 });
1904
1905 let cardinality = estimator.estimate(&ret);
1906 assert!((cardinality - 1000.0).abs() < 0.001);
1907 }
1908
1909 #[test]
1910 fn test_empty_cardinality() {
1911 let estimator = CardinalityEstimator::new();
1912 let cardinality = estimator.estimate(&LogicalOperator::Empty);
1913 assert!((cardinality).abs() < 0.001);
1914 }
1915
1916 #[test]
1917 fn test_table_stats_with_column() {
1918 let stats = TableStats::new(1000).with_column(
1919 "age",
1920 ColumnStats::new(50).with_nulls(10).with_range(0.0, 100.0),
1921 );
1922
1923 assert_eq!(stats.row_count, 1000);
1924 let col = stats.columns.get("age").unwrap();
1925 assert_eq!(col.distinct_count, 50);
1926 assert_eq!(col.null_count, 10);
1927 assert!((col.min_value.unwrap() - 0.0).abs() < 0.001);
1928 assert!((col.max_value.unwrap() - 100.0).abs() < 0.001);
1929 }
1930
1931 #[test]
1932 fn test_estimator_default() {
1933 let estimator = CardinalityEstimator::default();
1934 let scan = LogicalOperator::NodeScan(NodeScanOp {
1935 variable: "n".to_string(),
1936 label: None,
1937 input: None,
1938 });
1939 let cardinality = estimator.estimate(&scan);
1940 assert!((cardinality - 1000.0).abs() < 0.001);
1941 }
1942
1943 #[test]
1944 fn test_set_avg_fanout() {
1945 let mut estimator = CardinalityEstimator::new();
1946 estimator.add_table_stats("Person", TableStats::new(100));
1947 estimator.set_avg_fanout(5.0);
1948
1949 let expand = LogicalOperator::Expand(ExpandOp {
1950 from_variable: "a".to_string(),
1951 to_variable: "b".to_string(),
1952 edge_variable: None,
1953 direction: ExpandDirection::Outgoing,
1954 edge_types: vec![],
1955 min_hops: 1,
1956 max_hops: Some(1),
1957 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1958 variable: "a".to_string(),
1959 label: Some("Person".to_string()),
1960 input: None,
1961 })),
1962 path_alias: None,
1963 path_mode: PathMode::Walk,
1964 });
1965
1966 let cardinality = estimator.estimate(&expand);
1967 assert!((cardinality - 500.0).abs() < 0.001);
1969 }
1970
1971 #[test]
1972 fn test_multiple_group_by_keys_reduce_cardinality() {
1973 let mut estimator = CardinalityEstimator::new();
1977 estimator.add_table_stats("Person", TableStats::new(10000));
1978
1979 let single_group = LogicalOperator::Aggregate(AggregateOp {
1980 group_by: vec![LogicalExpression::Property {
1981 variable: "n".to_string(),
1982 property: "city".to_string(),
1983 }],
1984 aggregates: vec![],
1985 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
1986 variable: "n".to_string(),
1987 label: Some("Person".to_string()),
1988 input: None,
1989 })),
1990 having: None,
1991 });
1992
1993 let multi_group = LogicalOperator::Aggregate(AggregateOp {
1994 group_by: vec![
1995 LogicalExpression::Property {
1996 variable: "n".to_string(),
1997 property: "city".to_string(),
1998 },
1999 LogicalExpression::Property {
2000 variable: "n".to_string(),
2001 property: "country".to_string(),
2002 },
2003 ],
2004 aggregates: vec![],
2005 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2006 variable: "n".to_string(),
2007 label: Some("Person".to_string()),
2008 input: None,
2009 })),
2010 having: None,
2011 });
2012
2013 let single_card = estimator.estimate(&single_group);
2014 let multi_card = estimator.estimate(&multi_group);
2015
2016 assert!(single_card < 10000.0);
2018 assert!(multi_card < 10000.0);
2019 assert!(single_card >= 1.0);
2021 assert!(multi_card >= 1.0);
2022 }
2023
2024 #[test]
2027 fn test_histogram_build_uniform() {
2028 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2030 let histogram = EquiDepthHistogram::build(&values, 10);
2031
2032 assert_eq!(histogram.num_buckets(), 10);
2033 assert_eq!(histogram.total_rows(), 100);
2034
2035 for bucket in histogram.buckets() {
2037 assert!(bucket.frequency >= 9 && bucket.frequency <= 11);
2038 }
2039 }
2040
2041 #[test]
2042 fn test_histogram_build_skewed() {
2043 let mut values: Vec<f64> = (0..80).map(|i| i as f64).collect();
2045 values.extend((0..20).map(|i| 1000.0 + i as f64));
2046 let histogram = EquiDepthHistogram::build(&values, 5);
2047
2048 assert_eq!(histogram.num_buckets(), 5);
2049 assert_eq!(histogram.total_rows(), 100);
2050
2051 for bucket in histogram.buckets() {
2053 assert!(bucket.frequency >= 18 && bucket.frequency <= 22);
2054 }
2055 }
2056
2057 #[test]
2058 fn test_histogram_range_selectivity_full() {
2059 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2060 let histogram = EquiDepthHistogram::build(&values, 10);
2061
2062 let selectivity = histogram.range_selectivity(None, None);
2064 assert!((selectivity - 1.0).abs() < 0.01);
2065 }
2066
2067 #[test]
2068 fn test_histogram_range_selectivity_half() {
2069 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2070 let histogram = EquiDepthHistogram::build(&values, 10);
2071
2072 let selectivity = histogram.range_selectivity(Some(50.0), None);
2074 assert!(selectivity > 0.4 && selectivity < 0.6);
2075 }
2076
2077 #[test]
2078 fn test_histogram_range_selectivity_quarter() {
2079 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2080 let histogram = EquiDepthHistogram::build(&values, 10);
2081
2082 let selectivity = histogram.range_selectivity(None, Some(25.0));
2084 assert!(selectivity > 0.2 && selectivity < 0.3);
2085 }
2086
2087 #[test]
2088 fn test_histogram_equality_selectivity() {
2089 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2090 let histogram = EquiDepthHistogram::build(&values, 10);
2091
2092 let selectivity = histogram.equality_selectivity(50.0);
2094 assert!(selectivity > 0.005 && selectivity < 0.02);
2095 }
2096
2097 #[test]
2098 fn test_histogram_empty() {
2099 let histogram = EquiDepthHistogram::build(&[], 10);
2100
2101 assert_eq!(histogram.num_buckets(), 0);
2102 assert_eq!(histogram.total_rows(), 0);
2103
2104 let selectivity = histogram.range_selectivity(Some(0.0), Some(100.0));
2106 assert!((selectivity - 0.33).abs() < 0.01);
2107 }
2108
2109 #[test]
2110 fn test_histogram_bucket_overlap() {
2111 let bucket = HistogramBucket::new(10.0, 20.0, 100, 10);
2112
2113 assert!((bucket.overlap_fraction(Some(10.0), Some(20.0)) - 1.0).abs() < 0.01);
2115
2116 assert!((bucket.overlap_fraction(Some(10.0), Some(15.0)) - 0.5).abs() < 0.01);
2118
2119 assert!((bucket.overlap_fraction(Some(15.0), Some(20.0)) - 0.5).abs() < 0.01);
2121
2122 assert!((bucket.overlap_fraction(Some(0.0), Some(5.0))).abs() < 0.01);
2124
2125 assert!((bucket.overlap_fraction(Some(25.0), Some(30.0))).abs() < 0.01);
2127 }
2128
2129 #[test]
2130 fn test_column_stats_from_values() {
2131 let values = vec![10.0, 20.0, 30.0, 40.0, 50.0, 20.0, 30.0, 40.0];
2132 let stats = ColumnStats::from_values(values, 4);
2133
2134 assert_eq!(stats.distinct_count, 5); assert!(stats.min_value.is_some());
2136 assert!((stats.min_value.unwrap() - 10.0).abs() < 0.01);
2137 assert!(stats.max_value.is_some());
2138 assert!((stats.max_value.unwrap() - 50.0).abs() < 0.01);
2139 assert!(stats.histogram.is_some());
2140 }
2141
2142 #[test]
2143 fn test_column_stats_with_histogram_builder() {
2144 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2145 let histogram = EquiDepthHistogram::build(&values, 10);
2146
2147 let stats = ColumnStats::new(100)
2148 .with_range(0.0, 99.0)
2149 .with_histogram(histogram);
2150
2151 assert!(stats.histogram.is_some());
2152 assert_eq!(stats.histogram.as_ref().unwrap().num_buckets(), 10);
2153 }
2154
2155 #[test]
2156 fn test_filter_with_histogram_stats() {
2157 let mut estimator = CardinalityEstimator::new();
2158
2159 let age_values: Vec<f64> = (18..80).map(|i| i as f64).collect();
2161 let histogram = EquiDepthHistogram::build(&age_values, 10);
2162 let age_stats = ColumnStats::new(62)
2163 .with_range(18.0, 79.0)
2164 .with_histogram(histogram);
2165
2166 estimator.add_table_stats(
2167 "Person",
2168 TableStats::new(1000).with_column("age", age_stats),
2169 );
2170
2171 let filter = LogicalOperator::Filter(FilterOp {
2174 predicate: LogicalExpression::Binary {
2175 left: Box::new(LogicalExpression::Property {
2176 variable: "n".to_string(),
2177 property: "age".to_string(),
2178 }),
2179 op: BinaryOp::Gt,
2180 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2181 },
2182 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2183 variable: "n".to_string(),
2184 label: Some("Person".to_string()),
2185 input: None,
2186 })),
2187 pushdown_hint: None,
2188 });
2189
2190 let cardinality = estimator.estimate(&filter);
2191
2192 assert!(cardinality > 300.0 && cardinality < 600.0);
2195 }
2196
2197 #[test]
2198 fn test_filter_equality_with_histogram() {
2199 let mut estimator = CardinalityEstimator::new();
2200
2201 let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
2203 let histogram = EquiDepthHistogram::build(&values, 10);
2204 let stats = ColumnStats::new(100)
2205 .with_range(0.0, 99.0)
2206 .with_histogram(histogram);
2207
2208 estimator.add_table_stats("Data", TableStats::new(1000).with_column("value", stats));
2209
2210 let filter = LogicalOperator::Filter(FilterOp {
2212 predicate: LogicalExpression::Binary {
2213 left: Box::new(LogicalExpression::Property {
2214 variable: "d".to_string(),
2215 property: "value".to_string(),
2216 }),
2217 op: BinaryOp::Eq,
2218 right: Box::new(LogicalExpression::Literal(Value::Int64(50))),
2219 },
2220 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2221 variable: "d".to_string(),
2222 label: Some("Data".to_string()),
2223 input: None,
2224 })),
2225 pushdown_hint: None,
2226 });
2227
2228 let cardinality = estimator.estimate(&filter);
2229
2230 assert!((1.0..50.0).contains(&cardinality));
2233 }
2234
2235 #[test]
2236 fn test_histogram_min_max() {
2237 let values: Vec<f64> = vec![5.0, 10.0, 15.0, 20.0, 25.0];
2238 let histogram = EquiDepthHistogram::build(&values, 2);
2239
2240 assert_eq!(histogram.min_value(), Some(5.0));
2241 assert!(histogram.max_value().is_some());
2243 }
2244
2245 #[test]
2248 fn test_selectivity_config_defaults() {
2249 let config = SelectivityConfig::new();
2250 assert!((config.default - 0.1).abs() < f64::EPSILON);
2251 assert!((config.equality - 0.01).abs() < f64::EPSILON);
2252 assert!((config.inequality - 0.99).abs() < f64::EPSILON);
2253 assert!((config.range - 0.33).abs() < f64::EPSILON);
2254 assert!((config.string_ops - 0.1).abs() < f64::EPSILON);
2255 assert!((config.membership - 0.1).abs() < f64::EPSILON);
2256 assert!((config.is_null - 0.05).abs() < f64::EPSILON);
2257 assert!((config.is_not_null - 0.95).abs() < f64::EPSILON);
2258 assert!((config.distinct_fraction - 0.5).abs() < f64::EPSILON);
2259 }
2260
2261 #[test]
2262 fn test_custom_selectivity_config() {
2263 let config = SelectivityConfig {
2264 equality: 0.05,
2265 range: 0.25,
2266 ..SelectivityConfig::new()
2267 };
2268 let estimator = CardinalityEstimator::with_selectivity_config(config);
2269 assert!((estimator.selectivity_config().equality - 0.05).abs() < f64::EPSILON);
2270 assert!((estimator.selectivity_config().range - 0.25).abs() < f64::EPSILON);
2271 }
2272
2273 #[test]
2274 fn test_custom_selectivity_affects_estimation() {
2275 let mut default_est = CardinalityEstimator::new();
2277 default_est.add_table_stats("Person", TableStats::new(1000));
2278
2279 let filter = LogicalOperator::Filter(FilterOp {
2280 predicate: LogicalExpression::Binary {
2281 left: Box::new(LogicalExpression::Property {
2282 variable: "n".to_string(),
2283 property: "name".to_string(),
2284 }),
2285 op: BinaryOp::Eq,
2286 right: Box::new(LogicalExpression::Literal(Value::String("Alix".into()))),
2287 },
2288 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2289 variable: "n".to_string(),
2290 label: Some("Person".to_string()),
2291 input: None,
2292 })),
2293 pushdown_hint: None,
2294 });
2295
2296 let default_card = default_est.estimate(&filter);
2297
2298 let config = SelectivityConfig {
2300 equality: 0.2,
2301 ..SelectivityConfig::new()
2302 };
2303 let mut custom_est = CardinalityEstimator::with_selectivity_config(config);
2304 custom_est.add_table_stats("Person", TableStats::new(1000));
2305
2306 let custom_card = custom_est.estimate(&filter);
2307
2308 assert!(custom_card > default_card);
2309 assert!((custom_card - 200.0).abs() < 1.0);
2310 }
2311
2312 #[test]
2313 fn test_custom_range_selectivity() {
2314 let config = SelectivityConfig {
2315 range: 0.5,
2316 ..SelectivityConfig::new()
2317 };
2318 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2319 estimator.add_table_stats("Person", TableStats::new(1000));
2320
2321 let filter = LogicalOperator::Filter(FilterOp {
2322 predicate: LogicalExpression::Binary {
2323 left: Box::new(LogicalExpression::Property {
2324 variable: "n".to_string(),
2325 property: "age".to_string(),
2326 }),
2327 op: BinaryOp::Gt,
2328 right: Box::new(LogicalExpression::Literal(Value::Int64(30))),
2329 },
2330 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2331 variable: "n".to_string(),
2332 label: Some("Person".to_string()),
2333 input: None,
2334 })),
2335 pushdown_hint: None,
2336 });
2337
2338 let cardinality = estimator.estimate(&filter);
2339 assert!((cardinality - 500.0).abs() < 1.0);
2341 }
2342
2343 #[test]
2344 fn test_custom_distinct_fraction() {
2345 let config = SelectivityConfig {
2346 distinct_fraction: 0.8,
2347 ..SelectivityConfig::new()
2348 };
2349 let mut estimator = CardinalityEstimator::with_selectivity_config(config);
2350 estimator.add_table_stats("Person", TableStats::new(1000));
2351
2352 let distinct = LogicalOperator::Distinct(DistinctOp {
2353 input: Box::new(LogicalOperator::NodeScan(NodeScanOp {
2354 variable: "n".to_string(),
2355 label: Some("Person".to_string()),
2356 input: None,
2357 })),
2358 columns: None,
2359 });
2360
2361 let cardinality = estimator.estimate(&distinct);
2362 assert!((cardinality - 800.0).abs() < 1.0);
2364 }
2365
2366 #[test]
2369 fn test_estimation_log_basic() {
2370 let mut log = EstimationLog::new(10.0);
2371 log.record("NodeScan(Person)", 1000.0, 1200.0);
2372 log.record("Filter(age > 30)", 100.0, 90.0);
2373
2374 assert_eq!(log.entries().len(), 2);
2375 assert!(!log.should_replan()); }
2377
2378 #[test]
2379 fn test_estimation_log_triggers_replan() {
2380 let mut log = EstimationLog::new(10.0);
2381 log.record("NodeScan(Person)", 100.0, 5000.0); assert!(log.should_replan());
2384 }
2385
2386 #[test]
2387 fn test_estimation_log_overestimate_triggers_replan() {
2388 let mut log = EstimationLog::new(5.0);
2389 log.record("Filter", 1000.0, 100.0); assert!(log.should_replan()); }
2393
2394 #[test]
2395 fn test_estimation_entry_error_ratio() {
2396 let entry = EstimationEntry {
2397 operator: "test".into(),
2398 estimated: 100.0,
2399 actual: 200.0,
2400 };
2401 assert!((entry.error_ratio() - 2.0).abs() < f64::EPSILON);
2402
2403 let perfect = EstimationEntry {
2404 operator: "test".into(),
2405 estimated: 100.0,
2406 actual: 100.0,
2407 };
2408 assert!((perfect.error_ratio() - 1.0).abs() < f64::EPSILON);
2409
2410 let zero_est = EstimationEntry {
2411 operator: "test".into(),
2412 estimated: 0.0,
2413 actual: 0.0,
2414 };
2415 assert!((zero_est.error_ratio() - 1.0).abs() < f64::EPSILON);
2416 }
2417
2418 #[test]
2419 fn test_estimation_log_max_error_ratio() {
2420 let mut log = EstimationLog::new(10.0);
2421 log.record("A", 100.0, 300.0); log.record("B", 100.0, 50.0); log.record("C", 100.0, 100.0); assert!((log.max_error_ratio() - 3.0).abs() < f64::EPSILON);
2426 }
2427
2428 #[test]
2429 fn test_estimation_log_clear() {
2430 let mut log = EstimationLog::new(10.0);
2431 log.record("A", 100.0, 100.0);
2432 assert_eq!(log.entries().len(), 1);
2433
2434 log.clear();
2435 assert!(log.entries().is_empty());
2436 assert!(!log.should_replan());
2437 }
2438
2439 #[test]
2440 fn test_create_estimation_log() {
2441 let log = CardinalityEstimator::create_estimation_log();
2442 assert!(log.entries().is_empty());
2443 assert!(!log.should_replan());
2444 }
2445
2446 #[test]
2447 fn test_equality_selectivity_empty_histogram() {
2448 let hist = EquiDepthHistogram::new(vec![]);
2449 assert_eq!(hist.equality_selectivity(5.0), 0.01);
2451 }
2452
2453 #[test]
2454 fn test_equality_selectivity_value_in_bucket() {
2455 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2456 let hist = EquiDepthHistogram::build(&values, 2);
2457 let sel = hist.equality_selectivity(3.0);
2458 assert!(sel > 0.0);
2459 assert!(sel <= 1.0);
2460 }
2461
2462 #[test]
2463 fn test_equality_selectivity_value_outside_all_buckets() {
2464 let values: Vec<f64> = (1..=10).map(|i| i as f64).collect();
2465 let hist = EquiDepthHistogram::build(&values, 2);
2466 let sel = hist.equality_selectivity(9999.0);
2468 assert_eq!(sel, 0.001);
2469 }
2470
2471 #[test]
2472 fn test_histogram_min_max_empty() {
2473 let hist = EquiDepthHistogram::new(vec![]);
2474 assert_eq!(hist.min_value(), None);
2475 assert_eq!(hist.max_value(), None);
2476 }
2477
2478 #[test]
2479 fn test_histogram_min_max_single_bucket() {
2480 let hist = EquiDepthHistogram::new(vec![HistogramBucket::new(1.0, 10.0, 5, 5)]);
2481 assert_eq!(hist.min_value(), Some(1.0));
2482 assert_eq!(hist.max_value(), Some(10.0));
2483 }
2484
2485 #[test]
2486 fn test_histogram_min_max_multi_bucket() {
2487 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0];
2488 let hist = EquiDepthHistogram::build(&values, 3);
2489 let min = hist.min_value().unwrap();
2490 let max = hist.max_value().unwrap();
2491 assert!((min - 1.0).abs() < 1e-9, "min should be 1.0, got {min}");
2492 assert!(max >= 20.0, "max should be >= last value, got {max}");
2493 }
2494
2495 #[test]
2496 fn test_count_and_conjuncts_single_expression() {
2497 use crate::query::plan::LogicalExpression;
2498 let expr = LogicalExpression::Literal(Value::Bool(true));
2499 assert_eq!(count_and_conjuncts(&expr), 1);
2500 }
2501
2502 #[test]
2503 fn test_count_and_conjuncts_flat_and() {
2504 use crate::query::plan::{BinaryOp, LogicalExpression};
2505 let expr = LogicalExpression::Binary {
2506 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2507 op: BinaryOp::And,
2508 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2509 };
2510 assert_eq!(count_and_conjuncts(&expr), 2);
2511 }
2512
2513 #[test]
2514 fn test_count_and_conjuncts_nested_and() {
2515 use crate::query::plan::{BinaryOp, LogicalExpression};
2516 let ab = LogicalExpression::Binary {
2517 left: Box::new(LogicalExpression::Literal(Value::Bool(true))),
2518 op: BinaryOp::And,
2519 right: Box::new(LogicalExpression::Literal(Value::Bool(false))),
2520 };
2521 let cd = LogicalExpression::Binary {
2522 left: Box::new(LogicalExpression::Literal(Value::Int64(1))),
2523 op: BinaryOp::And,
2524 right: Box::new(LogicalExpression::Literal(Value::Int64(2))),
2525 };
2526 let expr = LogicalExpression::Binary {
2527 left: Box::new(ab),
2528 op: BinaryOp::And,
2529 right: Box::new(cd),
2530 };
2531 assert_eq!(count_and_conjuncts(&expr), 4);
2532 }
2533
2534 #[test]
2535 fn test_count_distinct_empty() {
2536 assert_eq!(count_distinct(&[]), 0);
2537 }
2538
2539 #[test]
2540 fn test_count_distinct_all_unique() {
2541 assert_eq!(count_distinct(&[1.0, 2.0, 3.0, 4.0]), 4);
2542 }
2543
2544 #[test]
2545 fn test_count_distinct_with_duplicates() {
2546 assert_eq!(count_distinct(&[1.0, 1.0, 2.0, 2.0, 3.0]), 3);
2547 }
2548
2549 #[test]
2550 fn test_count_distinct_all_same() {
2551 assert_eq!(count_distinct(&[5.0, 5.0, 5.0]), 1);
2552 }
2553
2554 #[test]
2555 fn test_count_distinct_single_value() {
2556 assert_eq!(count_distinct(&[42.0]), 1);
2557 }
2558}