Skip to main content

oxirs_arq/optimizer/
join_order.rs

1//! Adaptive Join Ordering with Cost-Based Optimization
2//!
3//! This module implements cost-based join reordering for SPARQL query optimization.
4//! It uses selectivity estimation and bound variable counting to order triple patterns
5//! so that the most selective patterns execute first, reducing intermediate result sizes.
6//!
7//! # Key Types
8//!
9//! - [`JoinOrderOptimizer`] — stateless optimizer that reorders triple patterns
10//! - [`CardinalityEstimate`] — min/max/expected cardinality range
11//! - [`PatternCost`] — cost estimate combining cardinality and selectivity
12//! - [`JoinGraphStats`] — graph statistics used for estimation
13//! - [`AdaptiveQueryPlan`] — runtime-adaptive plan that can reorder itself
14
15use crate::algebra::{Term, TriplePattern};
16use std::collections::HashMap;
17use std::time::Instant;
18
19// ---------------------------------------------------------------------------
20// Cardinality / Cost types
21// ---------------------------------------------------------------------------
22
23/// Estimated cardinality range for a triple pattern or join result.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CardinalityEstimate {
26    /// Minimum possible result count (optimistic).
27    pub min: u64,
28    /// Maximum possible result count (pessimistic).
29    pub max: u64,
30    /// Expected result count (used for join ordering decisions).
31    pub expected: u64,
32}
33
34impl CardinalityEstimate {
35    /// Create a new estimate with explicit min/max/expected.
36    pub fn new(min: u64, max: u64, expected: u64) -> Self {
37        Self { min, max, expected }
38    }
39
40    /// Single-point estimate (min == max == expected).
41    pub fn exact(count: u64) -> Self {
42        Self {
43            min: count,
44            max: count,
45            expected: count,
46        }
47    }
48
49    /// Unknown estimate — uses full triple count as upper bound.
50    pub fn unknown(triple_count: u64) -> Self {
51        Self {
52            min: 0,
53            max: triple_count,
54            expected: triple_count / 2 + 1,
55        }
56    }
57}
58
59impl std::fmt::Display for CardinalityEstimate {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(f, "[{}..{}, ~{}]", self.min, self.max, self.expected)
62    }
63}
64
65/// Cost estimate for a single triple pattern.
66#[derive(Debug, Clone)]
67pub struct PatternCost {
68    /// Estimated cardinality of the result set.
69    pub estimated_cardinality: u64,
70    /// Selectivity factor in (0.0, 1.0] — lower is more selective.
71    pub selectivity: f64,
72    /// Number of bound (non-variable) terms in the pattern (0..=3).
73    pub bound_count: u8,
74}
75
76impl PatternCost {
77    /// Composite score used for greedy ordering: lower is better (execute first).
78    ///
79    /// Combines cardinality and selectivity, biased toward patterns with more bound terms.
80    pub fn ordering_score(&self) -> f64 {
81        let base = (self.estimated_cardinality as f64 + 1.0) * self.selectivity;
82        // Patterns with more bound terms score lower (executed earlier).
83        let bound_bias = 1.0 / (1.0 + self.bound_count as f64);
84        base * bound_bias
85    }
86}
87
88// ---------------------------------------------------------------------------
89// Graph statistics
90// ---------------------------------------------------------------------------
91
92/// Graph statistics used by the join-order optimizer for cardinality estimation.
93///
94/// Intentionally named `JoinGraphStats` to avoid collision with
95/// `analytics::GraphStats`.
96#[derive(Debug, Clone, Default)]
97pub struct JoinGraphStats {
98    /// Total number of triples in the graph.
99    pub triple_count: u64,
100    /// Number of distinct subjects.
101    pub distinct_subjects: u64,
102    /// Number of distinct predicates.
103    pub distinct_predicates: u64,
104    /// Number of distinct objects.
105    pub distinct_objects: u64,
106    /// Per-predicate triple counts (predicate URI → count).
107    pub per_predicate_counts: HashMap<String, u64>,
108}
109
110impl JoinGraphStats {
111    /// Create a new stats object with the given triple count.
112    pub fn new(triple_count: u64) -> Self {
113        Self {
114            triple_count,
115            distinct_subjects: (triple_count / 3).max(1),
116            distinct_predicates: (triple_count / 10).max(1),
117            distinct_objects: (triple_count / 2).max(1),
118            per_predicate_counts: HashMap::new(),
119        }
120    }
121
122    /// Create fully specified stats.
123    pub fn with_details(
124        triple_count: u64,
125        distinct_subjects: u64,
126        distinct_predicates: u64,
127        distinct_objects: u64,
128        per_predicate_counts: HashMap<String, u64>,
129    ) -> Self {
130        Self {
131            triple_count,
132            distinct_subjects,
133            distinct_predicates,
134            distinct_objects,
135            per_predicate_counts,
136        }
137    }
138
139    /// Insert a predicate count entry.
140    pub fn add_predicate_count(&mut self, predicate: impl Into<String>, count: u64) {
141        self.per_predicate_counts.insert(predicate.into(), count);
142    }
143
144    /// Estimated selectivity of a given predicate (fraction of triples with this predicate).
145    pub fn predicate_selectivity(&self, predicate_uri: &str) -> f64 {
146        if self.triple_count == 0 {
147            return 1.0;
148        }
149        let count = self
150            .per_predicate_counts
151            .get(predicate_uri)
152            .copied()
153            .unwrap_or(self.triple_count / self.distinct_predicates.max(1));
154        (count as f64) / (self.triple_count as f64)
155    }
156}
157
158// ---------------------------------------------------------------------------
159// JoinOrderOptimizer
160// ---------------------------------------------------------------------------
161
162/// Cost-based join reordering optimizer for SPARQL triple patterns.
163///
164/// The optimizer is stateless — it uses `JoinGraphStats` passed at call time
165/// to produce an ordered list of patterns that minimises intermediate result sizes.
166///
167/// # Algorithm
168///
169/// A greedy approach is used:
170/// 1. Compute a `PatternCost` for each pattern based on bound-variable count
171///    and cardinality estimate.
172/// 2. Sort patterns by `PatternCost::ordering_score()` (ascending).
173///
174/// For small pattern lists (≤ 4), a dynamic-programming exhaustive search is
175/// performed instead.
176pub struct JoinOrderOptimizer {
177    /// Maximum number of patterns below which DP exhaustive search is used.
178    dp_threshold: usize,
179}
180
181impl JoinOrderOptimizer {
182    /// Create a new optimizer with default settings.
183    pub fn new() -> Self {
184        Self { dp_threshold: 4 }
185    }
186
187    /// Set the DP threshold.  When `patterns.len() <= dp_threshold`, an exhaustive
188    /// search is performed instead of greedy ordering.
189    pub fn with_dp_threshold(mut self, threshold: usize) -> Self {
190        self.dp_threshold = threshold;
191        self
192    }
193
194    // ------------------------------------------------------------------
195    // Public API
196    // ------------------------------------------------------------------
197
198    /// Count the number of bound (non-variable) terms in a pattern.
199    pub fn bound_variable_count(pattern: &TriplePattern) -> u8 {
200        let subject_bound = !matches!(pattern.subject, Term::Variable(_));
201        let predicate_bound = !matches!(pattern.predicate, Term::Variable(_));
202        let object_bound = !matches!(pattern.object, Term::Variable(_));
203        subject_bound as u8 + predicate_bound as u8 + object_bound as u8
204    }
205
206    /// Estimate the cardinality of a single triple pattern given graph statistics.
207    pub fn estimate_cardinality(
208        pattern: &TriplePattern,
209        stats: &JoinGraphStats,
210    ) -> CardinalityEstimate {
211        if stats.triple_count == 0 {
212            return CardinalityEstimate::exact(0);
213        }
214
215        let bound = Self::bound_variable_count(pattern);
216
217        match bound {
218            3 => {
219                // Fully bound — at most 1 result.
220                CardinalityEstimate::new(0, 1, 1)
221            }
222            2 => {
223                // Two bound terms — highly selective.
224                let expected = match (&pattern.subject, &pattern.predicate, &pattern.object) {
225                    (Term::Variable(_), _, _) => {
226                        // ?s <p> <o>
227                        let pred_sel =
228                            Self::predicate_selectivity_from_term(&pattern.predicate, stats);
229                        ((stats.triple_count as f64 * pred_sel)
230                            / stats.distinct_objects.max(1) as f64) as u64
231                    }
232                    (_, Term::Variable(_), _) => {
233                        // <s> ?p <o>
234                        (stats.triple_count / stats.distinct_subjects.max(1))
235                            / stats.distinct_objects.max(1)
236                    }
237                    (_, _, Term::Variable(_)) => {
238                        // <s> <p> ?o
239                        let pred_sel =
240                            Self::predicate_selectivity_from_term(&pattern.predicate, stats);
241                        (stats.triple_count as f64 * pred_sel
242                            / stats.distinct_subjects.max(1) as f64) as u64
243                    }
244                    _ => 1,
245                };
246                let expected = expected.max(1);
247                CardinalityEstimate::new(0, expected * 2, expected)
248            }
249            1 => {
250                // One bound term — moderately selective.
251                let expected = match (&pattern.subject, &pattern.predicate, &pattern.object) {
252                    (_, Term::Variable(_), Term::Variable(_)) => {
253                        // <s> ?p ?o
254                        stats.triple_count / stats.distinct_subjects.max(1)
255                    }
256                    (Term::Variable(_), _, Term::Variable(_)) => {
257                        // ?s <p> ?o
258                        let pred_sel =
259                            Self::predicate_selectivity_from_term(&pattern.predicate, stats);
260                        (stats.triple_count as f64 * pred_sel) as u64
261                    }
262                    (Term::Variable(_), Term::Variable(_), _) => {
263                        // ?s ?p <o>
264                        stats.triple_count / stats.distinct_objects.max(1)
265                    }
266                    _ => stats.triple_count / 3,
267                };
268                let expected = expected.max(1);
269                CardinalityEstimate::new(0, stats.triple_count, expected)
270            }
271            _ => {
272                // No bound terms — full scan.
273                CardinalityEstimate::unknown(stats.triple_count)
274            }
275        }
276    }
277
278    /// Compute the full cost of a triple pattern.
279    pub fn pattern_cost(pattern: &TriplePattern, stats: &JoinGraphStats) -> PatternCost {
280        let bound_count = Self::bound_variable_count(pattern);
281        let estimate = Self::estimate_cardinality(pattern, stats);
282        let selectivity = if stats.triple_count > 0 {
283            (estimate.expected as f64) / (stats.triple_count as f64)
284        } else {
285            1.0
286        };
287        PatternCost {
288            estimated_cardinality: estimate.expected,
289            selectivity: selectivity.clamp(1e-9, 1.0),
290            bound_count,
291        }
292    }
293
294    /// Reorder triple patterns for optimal join ordering.
295    ///
296    /// Uses exhaustive DP for small lists (≤ `dp_threshold`) and greedy ordering
297    /// for larger lists.
298    pub fn reorder_joins(
299        &self,
300        patterns: &[TriplePattern],
301        stats: &JoinGraphStats,
302    ) -> Vec<TriplePattern> {
303        if patterns.len() <= 1 {
304            return patterns.to_vec();
305        }
306
307        if patterns.len() <= self.dp_threshold {
308            self.dp_reorder(patterns, stats)
309        } else {
310            self.greedy_reorder(patterns, stats)
311        }
312    }
313
314    // ------------------------------------------------------------------
315    // Private helpers
316    // ------------------------------------------------------------------
317
318    /// Greedy join reordering: sort by PatternCost::ordering_score() ascending.
319    fn greedy_reorder(
320        &self,
321        patterns: &[TriplePattern],
322        stats: &JoinGraphStats,
323    ) -> Vec<TriplePattern> {
324        let mut scored: Vec<(f64, TriplePattern)> = patterns
325            .iter()
326            .map(|p| {
327                let cost = Self::pattern_cost(p, stats);
328                (cost.ordering_score(), p.clone())
329            })
330            .collect();
331        scored.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
332        scored.into_iter().map(|(_, p)| p).collect()
333    }
334
335    /// DP exhaustive reordering for small pattern sets (n! permutations, n ≤ 4 → ≤ 24).
336    fn dp_reorder(&self, patterns: &[TriplePattern], stats: &JoinGraphStats) -> Vec<TriplePattern> {
337        let n = patterns.len();
338        if n == 0 {
339            return vec![];
340        }
341
342        let costs: Vec<PatternCost> = patterns
343            .iter()
344            .map(|p| Self::pattern_cost(p, stats))
345            .collect();
346
347        // DP over subsets: dp[mask] = (best_cost, last_pattern_index)
348        let full_mask = (1usize << n) - 1;
349        let mut dp = vec![(f64::INFINITY, usize::MAX); 1 << n];
350        let mut parent = vec![(usize::MAX, usize::MAX); 1 << n];
351        dp[0] = (0.0, usize::MAX);
352
353        for mask in 0..=full_mask {
354            if dp[mask].0 == f64::INFINITY && mask != 0 {
355                continue;
356            }
357            // Position of the next pattern to place (0 = first, n-1 = last).
358            let position = mask.count_ones() as usize;
359            // Weight: patterns placed early carry more influence on total cost
360            // (remaining patterns will join against this result).
361            let weight = (n - position) as f64;
362            for (i, cost) in costs.iter().enumerate() {
363                if mask & (1 << i) != 0 {
364                    continue;
365                }
366                let new_mask = mask | (1 << i);
367                let step_cost = cost.ordering_score() * weight;
368                let new_cost = dp[mask].0 + step_cost;
369                if new_cost < dp[new_mask].0 {
370                    dp[new_mask] = (new_cost, i);
371                    parent[new_mask] = (mask, i);
372                }
373            }
374        }
375
376        // Reconstruct path.
377        let mut order = Vec::with_capacity(n);
378        let mut mask = full_mask;
379        while mask != 0 {
380            let (prev_mask, idx) = parent[mask];
381            if idx == usize::MAX {
382                break;
383            }
384            order.push(idx);
385            mask = prev_mask;
386        }
387        order.reverse();
388        order.iter().map(|&i| patterns[i].clone()).collect()
389    }
390
391    /// Extract predicate selectivity from a Term (if it's a named node).
392    fn predicate_selectivity_from_term(term: &Term, stats: &JoinGraphStats) -> f64 {
393        if let Term::Iri(iri) = term {
394            stats.predicate_selectivity(iri.as_str())
395        } else {
396            1.0 / stats.distinct_predicates.max(1) as f64
397        }
398    }
399}
400
401impl Default for JoinOrderOptimizer {
402    fn default() -> Self {
403        Self::new()
404    }
405}
406
407// ---------------------------------------------------------------------------
408// AdaptiveQueryPlan
409// ---------------------------------------------------------------------------
410
411/// A query plan that can reorder its triple patterns at runtime based on
412/// observed intermediate result sizes.
413///
414/// The plan starts with an optimized static ordering produced by
415/// [`JoinOrderOptimizer`], then checks after each pattern execution whether
416/// the actual cardinality diverges enough from the estimate to warrant
417/// re-ordering the remaining patterns.
418#[derive(Debug, Clone)]
419pub struct AdaptiveQueryPlan {
420    /// Current ordered list of triple patterns.
421    pub patterns: Vec<TriplePattern>,
422    /// Pre-computed cost estimates (same order as `patterns`).
423    pub estimated_costs: Vec<PatternCost>,
424    /// How much actual cardinality must differ from estimate (as a ratio)
425    /// before we trigger a re-order.  Default: 5.0 (5×).
426    pub reorder_threshold: u64,
427    /// Index of the next pattern to execute.
428    current_index: usize,
429    /// Creation timestamp.
430    created_at: Instant,
431}
432
433impl AdaptiveQueryPlan {
434    /// Create a plan from a slice of patterns, immediately optimizing their order.
435    pub fn new(patterns: Vec<TriplePattern>, stats: &JoinGraphStats) -> Self {
436        let optimizer = JoinOrderOptimizer::new();
437        let ordered = optimizer.reorder_joins(&patterns, stats);
438        let estimated_costs = ordered
439            .iter()
440            .map(|p| JoinOrderOptimizer::pattern_cost(p, stats))
441            .collect();
442        Self {
443            patterns: ordered,
444            estimated_costs,
445            reorder_threshold: 5,
446            current_index: 0,
447            created_at: Instant::now(),
448        }
449    }
450
451    /// Set the reorder threshold ratio (actual / expected must exceed this).
452    pub fn with_reorder_threshold(mut self, threshold: u64) -> Self {
453        self.reorder_threshold = threshold;
454        self
455    }
456
457    /// Check whether the observed actual cardinality for `pattern_idx` is far
458    /// enough from the estimate to justify reordering remaining patterns.
459    pub fn should_reorder(&self, actual_cardinality: u64, pattern_idx: usize) -> bool {
460        if pattern_idx >= self.estimated_costs.len() {
461            return false;
462        }
463        let estimated = self.estimated_costs[pattern_idx].estimated_cardinality;
464        if estimated == 0 {
465            return actual_cardinality > 0;
466        }
467        let ratio = if actual_cardinality > estimated {
468            actual_cardinality / estimated
469        } else {
470            estimated / actual_cardinality.max(1)
471        };
472        ratio >= self.reorder_threshold
473    }
474
475    /// Reorder the remaining patterns (starting at `pattern_idx`) given updated
476    /// stats derived from observed intermediate results.
477    pub fn reorder_from(&mut self, pattern_idx: usize, stats: &JoinGraphStats) {
478        if pattern_idx >= self.patterns.len() {
479            return;
480        }
481        let remaining = self.patterns[pattern_idx..].to_vec();
482        let optimizer = JoinOrderOptimizer::new();
483        let reordered = optimizer.reorder_joins(&remaining, stats);
484        let new_costs: Vec<PatternCost> = reordered
485            .iter()
486            .map(|p| JoinOrderOptimizer::pattern_cost(p, stats))
487            .collect();
488        // Splice back.
489        self.patterns.splice(pattern_idx.., reordered);
490        self.estimated_costs.splice(pattern_idx.., new_costs);
491    }
492
493    /// Advance the plan to the next pattern.  Returns the next pattern or `None`
494    /// if the plan is exhausted.
495    pub fn next_pattern(&mut self) -> Option<&TriplePattern> {
496        if self.current_index < self.patterns.len() {
497            let p = &self.patterns[self.current_index];
498            self.current_index += 1;
499            Some(p)
500        } else {
501            None
502        }
503    }
504
505    /// Reset execution cursor to the beginning.
506    pub fn reset(&mut self) {
507        self.current_index = 0;
508    }
509
510    /// Number of patterns remaining (not yet executed).
511    pub fn remaining(&self) -> usize {
512        self.patterns.len().saturating_sub(self.current_index)
513    }
514
515    /// Elapsed time since plan creation.
516    pub fn elapsed(&self) -> std::time::Duration {
517        self.created_at.elapsed()
518    }
519
520    /// Total number of patterns in the plan.
521    pub fn len(&self) -> usize {
522        self.patterns.len()
523    }
524
525    /// Whether the plan has no patterns.
526    pub fn is_empty(&self) -> bool {
527        self.patterns.is_empty()
528    }
529}
530
531// ---------------------------------------------------------------------------
532// Tests
533// ---------------------------------------------------------------------------
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538    use crate::algebra::Term;
539    use oxirs_core::model::NamedNode;
540
541    // Helper to create a Term::Iri from a string.
542    fn iri(s: &str) -> Term {
543        Term::Iri(NamedNode::new(s).expect("valid IRI"))
544    }
545
546    // Helper to create a Term::Variable.
547    fn var(name: &str) -> Term {
548        use oxirs_core::model::Variable;
549        Term::Variable(Variable::new(name).expect("valid variable name"))
550    }
551
552    fn make_pattern(s: Term, p: Term, o: Term) -> TriplePattern {
553        TriplePattern {
554            subject: s,
555            predicate: p,
556            object: o,
557        }
558    }
559
560    fn default_stats() -> JoinGraphStats {
561        let mut stats = JoinGraphStats::new(10_000);
562        stats.add_predicate_count("http://ex.org/type", 500);
563        stats.add_predicate_count("http://ex.org/name", 8_000);
564        stats.add_predicate_count("http://ex.org/rare", 10);
565        stats
566    }
567
568    // ------------------------------------------------------------------
569    // CardinalityEstimate tests
570    // ------------------------------------------------------------------
571
572    #[test]
573    fn test_cardinality_estimate_exact() {
574        let est = CardinalityEstimate::exact(42);
575        assert_eq!(est.min, 42);
576        assert_eq!(est.max, 42);
577        assert_eq!(est.expected, 42);
578    }
579
580    #[test]
581    fn test_cardinality_estimate_unknown() {
582        let est = CardinalityEstimate::unknown(1_000);
583        assert_eq!(est.min, 0);
584        assert!(est.expected > 0);
585        assert!(est.max >= est.expected);
586    }
587
588    #[test]
589    fn test_cardinality_estimate_display() {
590        let est = CardinalityEstimate::new(1, 100, 50);
591        let s = format!("{est}");
592        assert!(s.contains("1..100"));
593    }
594
595    // ------------------------------------------------------------------
596    // JoinGraphStats tests
597    // ------------------------------------------------------------------
598
599    #[test]
600    fn test_graph_stats_new() {
601        let stats = JoinGraphStats::new(1_000);
602        assert_eq!(stats.triple_count, 1_000);
603        assert!(stats.distinct_subjects > 0);
604    }
605
606    #[test]
607    fn test_predicate_selectivity_known() {
608        let mut stats = JoinGraphStats::new(10_000);
609        stats.add_predicate_count("http://ex.org/p", 1_000);
610        let sel = stats.predicate_selectivity("http://ex.org/p");
611        assert!((sel - 0.1).abs() < 1e-9);
612    }
613
614    #[test]
615    fn test_predicate_selectivity_unknown() {
616        let stats = JoinGraphStats::new(10_000);
617        let sel = stats.predicate_selectivity("http://ex.org/unknown");
618        assert!(sel > 0.0 && sel <= 1.0);
619    }
620
621    #[test]
622    fn test_predicate_selectivity_zero_triples() {
623        let stats = JoinGraphStats::new(0);
624        let sel = stats.predicate_selectivity("http://ex.org/p");
625        assert_eq!(sel, 1.0);
626    }
627
628    // ------------------------------------------------------------------
629    // bound_variable_count tests
630    // ------------------------------------------------------------------
631
632    #[test]
633    fn test_bound_count_all_variables() {
634        let p = make_pattern(var("s"), var("p"), var("o"));
635        assert_eq!(JoinOrderOptimizer::bound_variable_count(&p), 0);
636    }
637
638    #[test]
639    fn test_bound_count_one_bound() {
640        let p = make_pattern(iri("http://ex.org/s"), var("p"), var("o"));
641        assert_eq!(JoinOrderOptimizer::bound_variable_count(&p), 1);
642    }
643
644    #[test]
645    fn test_bound_count_two_bound() {
646        let p = make_pattern(iri("http://ex.org/s"), iri("http://ex.org/p"), var("o"));
647        assert_eq!(JoinOrderOptimizer::bound_variable_count(&p), 2);
648    }
649
650    #[test]
651    fn test_bound_count_all_bound() {
652        let p = make_pattern(
653            iri("http://ex.org/s"),
654            iri("http://ex.org/p"),
655            iri("http://ex.org/o"),
656        );
657        assert_eq!(JoinOrderOptimizer::bound_variable_count(&p), 3);
658    }
659
660    // ------------------------------------------------------------------
661    // estimate_cardinality tests
662    // ------------------------------------------------------------------
663
664    #[test]
665    fn test_estimate_fully_bound() {
666        let stats = default_stats();
667        let p = make_pattern(
668            iri("http://ex.org/s"),
669            iri("http://ex.org/p"),
670            iri("http://ex.org/o"),
671        );
672        let est = JoinOrderOptimizer::estimate_cardinality(&p, &stats);
673        assert_eq!(est.max, 1);
674    }
675
676    #[test]
677    fn test_estimate_two_bound_spo() {
678        let stats = default_stats();
679        // <s> <p> ?o  — two bound
680        let p = make_pattern(iri("http://ex.org/s"), iri("http://ex.org/type"), var("o"));
681        let est = JoinOrderOptimizer::estimate_cardinality(&p, &stats);
682        assert!(est.expected > 0);
683    }
684
685    #[test]
686    fn test_estimate_full_scan() {
687        let stats = default_stats();
688        let p = make_pattern(var("s"), var("p"), var("o"));
689        let est = JoinOrderOptimizer::estimate_cardinality(&p, &stats);
690        assert_eq!(est.max, stats.triple_count);
691    }
692
693    #[test]
694    fn test_estimate_zero_graph() {
695        let stats = JoinGraphStats::new(0);
696        let p = make_pattern(var("s"), var("p"), var("o"));
697        let est = JoinOrderOptimizer::estimate_cardinality(&p, &stats);
698        assert_eq!(est.expected, 0);
699    }
700
701    // ------------------------------------------------------------------
702    // pattern_cost tests
703    // ------------------------------------------------------------------
704
705    #[test]
706    fn test_pattern_cost_ordering_score_fully_bound_lowest() {
707        let stats = default_stats();
708        let fully_bound = make_pattern(
709            iri("http://ex.org/s"),
710            iri("http://ex.org/p"),
711            iri("http://ex.org/o"),
712        );
713        let full_scan = make_pattern(var("s"), var("p"), var("o"));
714        let cost_bound = JoinOrderOptimizer::pattern_cost(&fully_bound, &stats);
715        let cost_scan = JoinOrderOptimizer::pattern_cost(&full_scan, &stats);
716        assert!(
717            cost_bound.ordering_score() < cost_scan.ordering_score(),
718            "fully bound should score lower than full scan"
719        );
720    }
721
722    #[test]
723    fn test_pattern_cost_selectivity_range() {
724        let stats = default_stats();
725        let p = make_pattern(var("s"), iri("http://ex.org/type"), var("o"));
726        let cost = JoinOrderOptimizer::pattern_cost(&p, &stats);
727        assert!(cost.selectivity > 0.0);
728        assert!(cost.selectivity <= 1.0);
729    }
730
731    // ------------------------------------------------------------------
732    // reorder_joins tests
733    // ------------------------------------------------------------------
734
735    #[test]
736    fn test_reorder_empty() {
737        let optimizer = JoinOrderOptimizer::new();
738        let stats = default_stats();
739        let result = optimizer.reorder_joins(&[], &stats);
740        assert!(result.is_empty());
741    }
742
743    #[test]
744    fn test_reorder_single() {
745        let optimizer = JoinOrderOptimizer::new();
746        let stats = default_stats();
747        let p = make_pattern(var("s"), var("p"), var("o"));
748        let result = optimizer.reorder_joins(std::slice::from_ref(&p), &stats);
749        assert_eq!(result.len(), 1);
750        assert_eq!(result[0], p);
751    }
752
753    #[test]
754    fn test_reorder_places_selective_first() {
755        let optimizer = JoinOrderOptimizer::new();
756        let stats = default_stats();
757
758        // Fully bound = most selective.
759        let fully_bound = make_pattern(
760            iri("http://ex.org/s"),
761            iri("http://ex.org/type"),
762            iri("http://ex.org/o"),
763        );
764        // Full scan = least selective.
765        let full_scan = make_pattern(var("s"), var("p"), var("o"));
766        // One bound term.
767        let one_bound = make_pattern(iri("http://ex.org/s"), var("p"), var("o"));
768
769        let patterns = vec![full_scan.clone(), one_bound.clone(), fully_bound.clone()];
770        let result = optimizer.reorder_joins(&patterns, &stats);
771
772        assert_eq!(result.len(), 3);
773        assert_eq!(result[0], fully_bound, "fully bound should come first");
774    }
775
776    #[test]
777    fn test_reorder_greedy_large_list() {
778        let optimizer = JoinOrderOptimizer::new().with_dp_threshold(2);
779        let stats = default_stats();
780
781        let patterns: Vec<TriplePattern> = (0..6)
782            .map(|i| {
783                if i % 2 == 0 {
784                    make_pattern(
785                        iri(&format!("http://ex.org/s{i}")),
786                        iri("http://ex.org/type"),
787                        var("o"),
788                    )
789                } else {
790                    make_pattern(var("s"), var("p"), var("o"))
791                }
792            })
793            .collect();
794
795        let result = optimizer.reorder_joins(&patterns, &stats);
796        assert_eq!(result.len(), 6);
797        // First result should have a lower ordering score (more selective)
798        let first_cost = JoinOrderOptimizer::pattern_cost(&result[0], &stats);
799        let last_cost = JoinOrderOptimizer::pattern_cost(&result[5], &stats);
800        assert!(first_cost.ordering_score() <= last_cost.ordering_score());
801    }
802
803    #[test]
804    fn test_reorder_dp_vs_greedy_equivalence_small() {
805        // For a small list, DP and greedy should both find a reasonable ordering.
806        let optimizer_dp = JoinOrderOptimizer::new().with_dp_threshold(5);
807        let optimizer_greedy = JoinOrderOptimizer::new().with_dp_threshold(0);
808        let stats = default_stats();
809
810        let patterns = vec![
811            make_pattern(var("s"), var("p"), var("o")),
812            make_pattern(iri("http://ex.org/s"), iri("http://ex.org/type"), var("o")),
813            make_pattern(iri("http://ex.org/s"), var("p"), var("o")),
814        ];
815
816        let dp_result = optimizer_dp.reorder_joins(&patterns, &stats);
817        let greedy_result = optimizer_greedy.reorder_joins(&patterns, &stats);
818
819        // Both should have the same length.
820        assert_eq!(dp_result.len(), greedy_result.len());
821    }
822
823    // ------------------------------------------------------------------
824    // AdaptiveQueryPlan tests
825    // ------------------------------------------------------------------
826
827    #[test]
828    fn test_adaptive_plan_creation() {
829        let stats = default_stats();
830        let patterns = vec![
831            make_pattern(var("s"), var("p"), var("o")),
832            make_pattern(iri("http://ex.org/s"), iri("http://ex.org/type"), var("o")),
833        ];
834        let plan = AdaptiveQueryPlan::new(patterns, &stats);
835        assert_eq!(plan.len(), 2);
836        assert!(!plan.is_empty());
837    }
838
839    #[test]
840    fn test_adaptive_plan_should_reorder_large_divergence() {
841        let stats = default_stats();
842        let patterns = vec![make_pattern(
843            iri("http://ex.org/s"),
844            iri("http://ex.org/type"),
845            var("o"),
846        )];
847        let plan = AdaptiveQueryPlan::new(patterns, &stats).with_reorder_threshold(5);
848
849        // Estimated cardinality for that pattern should be small; actual is huge.
850        let actual = plan.estimated_costs[0].estimated_cardinality * 100;
851        assert!(plan.should_reorder(actual, 0));
852    }
853
854    #[test]
855    fn test_adaptive_plan_should_not_reorder_small_divergence() {
856        let stats = default_stats();
857        let patterns = vec![make_pattern(
858            iri("http://ex.org/s"),
859            iri("http://ex.org/type"),
860            var("o"),
861        )];
862        let plan = AdaptiveQueryPlan::new(patterns, &stats).with_reorder_threshold(10);
863
864        let estimated = plan.estimated_costs[0].estimated_cardinality;
865        // Actual is only 2× the estimate — below threshold.
866        let actual = estimated * 2;
867        assert!(!plan.should_reorder(actual, 0));
868    }
869
870    #[test]
871    fn test_adaptive_plan_reorder_from() {
872        let stats = default_stats();
873        let patterns = vec![
874            make_pattern(var("s"), var("p"), var("o")),
875            make_pattern(var("s2"), var("p2"), var("o2")),
876            make_pattern(iri("http://ex.org/s"), iri("http://ex.org/type"), var("o")),
877        ];
878        let mut plan = AdaptiveQueryPlan::new(patterns, &stats);
879        // Reorder remaining from index 1.
880        plan.reorder_from(1, &stats);
881        assert_eq!(plan.len(), 3);
882    }
883
884    #[test]
885    fn test_adaptive_plan_next_pattern() {
886        let stats = default_stats();
887        let patterns = vec![
888            make_pattern(var("s"), var("p"), var("o")),
889            make_pattern(iri("http://ex.org/s"), var("p"), var("o")),
890        ];
891        let mut plan = AdaptiveQueryPlan::new(patterns, &stats);
892        assert!(plan.next_pattern().is_some());
893        assert!(plan.next_pattern().is_some());
894        assert!(plan.next_pattern().is_none());
895    }
896
897    #[test]
898    fn test_adaptive_plan_reset() {
899        let stats = default_stats();
900        let patterns = vec![make_pattern(var("s"), var("p"), var("o"))];
901        let mut plan = AdaptiveQueryPlan::new(patterns, &stats);
902        plan.next_pattern();
903        assert_eq!(plan.remaining(), 0);
904        plan.reset();
905        assert_eq!(plan.remaining(), 1);
906    }
907
908    #[test]
909    fn test_adaptive_plan_remaining() {
910        let stats = default_stats();
911        let patterns = vec![
912            make_pattern(var("s"), var("p"), var("o")),
913            make_pattern(var("a"), var("b"), var("c")),
914            make_pattern(var("x"), var("y"), var("z")),
915        ];
916        let mut plan = AdaptiveQueryPlan::new(patterns, &stats);
917        assert_eq!(plan.remaining(), 3);
918        plan.next_pattern();
919        assert_eq!(plan.remaining(), 2);
920    }
921
922    #[test]
923    fn test_adaptive_plan_elapsed() {
924        let stats = default_stats();
925        let patterns = vec![make_pattern(var("s"), var("p"), var("o"))];
926        let plan = AdaptiveQueryPlan::new(patterns, &stats);
927        // Elapsed should be very small (just created).
928        assert!(plan.elapsed().as_secs() < 5);
929    }
930
931    #[test]
932    fn test_adaptive_plan_out_of_bounds_reorder() {
933        let stats = default_stats();
934        let patterns = vec![make_pattern(var("s"), var("p"), var("o"))];
935        let mut plan = AdaptiveQueryPlan::new(patterns, &stats);
936        // Should not panic for out-of-bounds index.
937        plan.reorder_from(100, &stats);
938        assert_eq!(plan.len(), 1);
939    }
940
941    #[test]
942    fn test_adaptive_plan_should_reorder_zero_estimated() {
943        let stats = JoinGraphStats::new(0);
944        let patterns = vec![make_pattern(var("s"), var("p"), var("o"))];
945        let plan = AdaptiveQueryPlan::new(patterns, &stats);
946        // With zero estimated cardinality, actual > 0 triggers reorder.
947        assert!(plan.should_reorder(1, 0));
948        // actual == 0 does not trigger.
949        assert!(!plan.should_reorder(0, 0));
950    }
951}