oxirs_arq/
path_extensions.rs

1//! Extended Property Path Support for SPARQL
2//!
3//! This module extends the basic property path functionality with advanced
4//! optimization, caching, analysis, and specialized evaluation strategies.
5
6use crate::algebra::Term;
7use crate::path::{PathContext, PropertyPath};
8use anyhow::Result;
9use dashmap::DashMap;
10use std::collections::{HashMap, HashSet, VecDeque};
11use std::sync::Arc;
12
13/// Path cache for memoization of path evaluation results
14pub struct PathCache {
15    cache: DashMap<PathCacheKey, PathCacheEntry>,
16    max_entries: usize,
17    hits: Arc<std::sync::atomic::AtomicUsize>,
18    misses: Arc<std::sync::atomic::AtomicUsize>,
19}
20
21#[derive(Debug, Clone, Hash, PartialEq, Eq)]
22struct PathCacheKey {
23    path: PropertyPath,
24    start: String,       // Serialized term
25    end: Option<String>, // Serialized term (optional for reachability)
26}
27
28#[derive(Debug, Clone)]
29struct PathCacheEntry {
30    reachable: bool,
31    #[allow(dead_code)]
32    intermediate_nodes: Vec<String>,
33    #[allow(dead_code)]
34    path_length: usize,
35    timestamp: std::time::Instant,
36}
37
38impl PathCache {
39    /// Create a new path cache with default size
40    pub fn new() -> Self {
41        Self::with_capacity(10000)
42    }
43
44    /// Create a new path cache with specified capacity
45    pub fn with_capacity(max_entries: usize) -> Self {
46        Self {
47            cache: DashMap::new(),
48            max_entries,
49            hits: Arc::new(std::sync::atomic::AtomicUsize::new(0)),
50            misses: Arc::new(std::sync::atomic::AtomicUsize::new(0)),
51        }
52    }
53
54    /// Check if path is cached
55    pub fn get(&self, path: &PropertyPath, start: &Term, end: Option<&Term>) -> Option<bool> {
56        let key = PathCacheKey {
57            path: path.clone(),
58            start: format!("{:?}", start),
59            end: end.map(|t| format!("{:?}", t)),
60        };
61
62        if let Some(entry) = self.cache.get(&key) {
63            self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
64            Some(entry.reachable)
65        } else {
66            self.misses
67                .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
68            None
69        }
70    }
71
72    /// Cache a path evaluation result
73    pub fn insert(&self, path: &PropertyPath, start: &Term, end: Option<&Term>, reachable: bool) {
74        // Simple eviction: if at capacity, clear 10% of oldest entries
75        if self.cache.len() >= self.max_entries {
76            self.evict_oldest(self.max_entries / 10);
77        }
78
79        let key = PathCacheKey {
80            path: path.clone(),
81            start: format!("{:?}", start),
82            end: end.map(|t| format!("{:?}", t)),
83        };
84
85        let entry = PathCacheEntry {
86            reachable,
87            intermediate_nodes: Vec::new(),
88            path_length: 0,
89            timestamp: std::time::Instant::now(),
90        };
91
92        self.cache.insert(key, entry);
93    }
94
95    /// Clear the cache
96    pub fn clear(&self) {
97        self.cache.clear();
98    }
99
100    /// Get cache statistics
101    pub fn stats(&self) -> CacheStats {
102        let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
103        let misses = self.misses.load(std::sync::atomic::Ordering::Relaxed);
104        let total = hits + misses;
105        let hit_rate = if total > 0 {
106            hits as f64 / total as f64
107        } else {
108            0.0
109        };
110
111        CacheStats {
112            hits,
113            misses,
114            size: self.cache.len(),
115            capacity: self.max_entries,
116            hit_rate,
117        }
118    }
119
120    fn evict_oldest(&self, count: usize) {
121        let mut entries: Vec<_> = self.cache.iter().collect();
122        entries.sort_by_key(|e| e.value().timestamp);
123
124        for entry in entries.iter().take(count) {
125            self.cache.remove(entry.key());
126        }
127    }
128}
129
130impl Default for PathCache {
131    fn default() -> Self {
132        Self::new()
133    }
134}
135
136/// Cache statistics
137#[derive(Debug, Clone)]
138pub struct CacheStats {
139    pub hits: usize,
140    pub misses: usize,
141    pub size: usize,
142    pub capacity: usize,
143    pub hit_rate: f64,
144}
145
146/// Bidirectional path search for faster reachability queries
147pub struct BidirectionalPathSearch {
148    context: PathContext,
149}
150
151impl BidirectionalPathSearch {
152    /// Create new bidirectional search
153    pub fn new(context: PathContext) -> Self {
154        Self { context }
155    }
156
157    /// Search for path from start to end using bidirectional BFS
158    #[allow(unused_variables)]
159    pub fn search(
160        &self,
161        path: &PropertyPath,
162        start: &Term,
163        end: &Term,
164        dataset: &HashMap<Term, Vec<(Term, Term)>>,
165    ) -> Result<Option<Vec<Term>>> {
166        // Forward search from start
167        let mut forward_visited: HashMap<Term, Option<Term>> = HashMap::new();
168        let mut forward_queue = VecDeque::new();
169        forward_queue.push_back(start.clone());
170        forward_visited.insert(start.clone(), None);
171
172        // Backward search from end
173        let mut backward_visited: HashMap<Term, Option<Term>> = HashMap::new();
174        let mut backward_queue = VecDeque::new();
175        backward_queue.push_back(end.clone());
176        backward_visited.insert(end.clone(), None);
177
178        let mut iterations = 0;
179        let max_iterations = self.context.max_nodes;
180
181        while !forward_queue.is_empty() && !backward_queue.is_empty() && iterations < max_iterations
182        {
183            iterations += 1;
184
185            // Expand forward frontier (smaller first)
186            if forward_queue.len() <= backward_queue.len() {
187                if let Some(meeting_point) = self.expand_frontier(
188                    &mut forward_queue,
189                    &mut forward_visited,
190                    &backward_visited,
191                    dataset,
192                    false,
193                )? {
194                    return Ok(Some(self.reconstruct_path(
195                        &meeting_point,
196                        &forward_visited,
197                        &backward_visited,
198                        start,
199                        end,
200                    )));
201                }
202            } else {
203                // Expand backward frontier
204                if let Some(meeting_point) = self.expand_frontier(
205                    &mut backward_queue,
206                    &mut backward_visited,
207                    &forward_visited,
208                    dataset,
209                    true,
210                )? {
211                    return Ok(Some(self.reconstruct_path(
212                        &meeting_point,
213                        &forward_visited,
214                        &backward_visited,
215                        start,
216                        end,
217                    )));
218                }
219            }
220        }
221
222        Ok(None)
223    }
224
225    fn expand_frontier(
226        &self,
227        queue: &mut VecDeque<Term>,
228        visited: &mut HashMap<Term, Option<Term>>,
229        other_visited: &HashMap<Term, Option<Term>>,
230        dataset: &HashMap<Term, Vec<(Term, Term)>>,
231        _reverse: bool,
232    ) -> Result<Option<Term>> {
233        if let Some(current) = queue.pop_front() {
234            // Check if we've met the other search
235            if other_visited.contains_key(&current) {
236                return Ok(Some(current));
237            }
238
239            // Expand neighbors
240            if let Some(neighbors) = dataset.get(&current) {
241                for (predicate, neighbor) in neighbors {
242                    let _ = predicate; // Use predicate for path matching in real impl
243                    if !visited.contains_key(neighbor) {
244                        visited.insert(neighbor.clone(), Some(current.clone()));
245                        queue.push_back(neighbor.clone());
246                    }
247                }
248            }
249        }
250
251        Ok(None)
252    }
253
254    fn reconstruct_path(
255        &self,
256        meeting_point: &Term,
257        forward_visited: &HashMap<Term, Option<Term>>,
258        backward_visited: &HashMap<Term, Option<Term>>,
259        start: &Term,
260        end: &Term,
261    ) -> Vec<Term> {
262        let mut path = Vec::new();
263
264        // Build forward path
265        let mut current = meeting_point.clone();
266        let mut forward_path = Vec::new();
267        while let Some(Some(prev)) = forward_visited.get(&current) {
268            forward_path.push(current.clone());
269            current = prev.clone();
270            if current == *start {
271                forward_path.push(current.clone());
272                break;
273            }
274        }
275        forward_path.reverse();
276        path.extend(forward_path);
277
278        // Build backward path
279        current = meeting_point.clone();
280        while let Some(Some(next)) = backward_visited.get(&current) {
281            current = next.clone();
282            path.push(current.clone());
283            if current == *end {
284                break;
285            }
286        }
287
288        path
289    }
290}
291
292/// Path pattern analyzer for query optimization
293pub struct PathAnalyzer;
294
295impl PathAnalyzer {
296    /// Analyze path complexity
297    pub fn analyze_complexity(path: &PropertyPath) -> PathComplexity {
298        match path {
299            PropertyPath::Direct(_) => PathComplexity {
300                depth: 1,
301                branching_factor: 1,
302                has_recursion: false,
303                has_negation: false,
304                estimated_cost: 1.0,
305            },
306            PropertyPath::Inverse(inner) => {
307                let mut complexity = Self::analyze_complexity(inner);
308                complexity.estimated_cost *= 1.2; // Inverse slightly more expensive
309                complexity
310            }
311            PropertyPath::Sequence(left, right) => {
312                let left_complexity = Self::analyze_complexity(left);
313                let right_complexity = Self::analyze_complexity(right);
314                PathComplexity {
315                    depth: left_complexity.depth + right_complexity.depth,
316                    branching_factor: left_complexity
317                        .branching_factor
318                        .max(right_complexity.branching_factor),
319                    has_recursion: left_complexity.has_recursion || right_complexity.has_recursion,
320                    has_negation: left_complexity.has_negation || right_complexity.has_negation,
321                    estimated_cost: left_complexity.estimated_cost
322                        * right_complexity.estimated_cost,
323                }
324            }
325            PropertyPath::Alternative(left, right) => {
326                let left_complexity = Self::analyze_complexity(left);
327                let right_complexity = Self::analyze_complexity(right);
328                PathComplexity {
329                    depth: left_complexity.depth.max(right_complexity.depth),
330                    branching_factor: left_complexity.branching_factor
331                        + right_complexity.branching_factor,
332                    has_recursion: left_complexity.has_recursion || right_complexity.has_recursion,
333                    has_negation: left_complexity.has_negation || right_complexity.has_negation,
334                    estimated_cost: left_complexity.estimated_cost
335                        + right_complexity.estimated_cost,
336                }
337            }
338            PropertyPath::ZeroOrMore(inner) | PropertyPath::OneOrMore(inner) => {
339                let mut complexity = Self::analyze_complexity(inner);
340                complexity.depth *= 10; // Assume average of 10 iterations
341                complexity.has_recursion = true;
342                complexity.estimated_cost *= 100.0; // Recursive paths are expensive
343                complexity
344            }
345            PropertyPath::ZeroOrOne(inner) => {
346                let mut complexity = Self::analyze_complexity(inner);
347                complexity.branching_factor += 1; // Optional adds one branch
348                complexity.estimated_cost *= 1.5;
349                complexity
350            }
351            PropertyPath::NegatedPropertySet(_) => PathComplexity {
352                depth: 1,
353                branching_factor: 1,
354                has_recursion: false,
355                has_negation: true,
356                estimated_cost: 2.0, // Negation requires filtering
357            },
358        }
359    }
360
361    /// Suggest optimization for path
362    pub fn suggest_optimization(path: &PropertyPath) -> Vec<PathOptimizationHint> {
363        let complexity = Self::analyze_complexity(path);
364        let mut hints = Vec::new();
365
366        if complexity.has_recursion && complexity.depth > 100 {
367            hints.push(PathOptimizationHint::LimitRecursionDepth);
368        }
369
370        if complexity.branching_factor > 10 {
371            hints.push(PathOptimizationHint::UseIndexes);
372        }
373
374        if matches!(path, PropertyPath::Alternative(_, _)) {
375            hints.push(PathOptimizationHint::ReorderAlternatives);
376        }
377
378        if complexity.estimated_cost > 1000.0 {
379            hints.push(PathOptimizationHint::ConsiderMaterialization);
380        }
381
382        hints
383    }
384
385    /// Check if path is deterministic (always returns same results)
386    pub fn is_deterministic(path: &PropertyPath) -> bool {
387        match path {
388            PropertyPath::Direct(_)
389            | PropertyPath::Inverse(_)
390            | PropertyPath::Sequence(_, _)
391            | PropertyPath::Alternative(_, _)
392            | PropertyPath::ZeroOrMore(_)
393            | PropertyPath::OneOrMore(_)
394            | PropertyPath::ZeroOrOne(_) => true,
395            PropertyPath::NegatedPropertySet(_) => true,
396        }
397    }
398
399    /// Extract all predicates used in path
400    pub fn extract_predicates(path: &PropertyPath) -> HashSet<Term> {
401        let mut predicates = HashSet::new();
402        Self::extract_predicates_recursive(path, &mut predicates);
403        predicates
404    }
405
406    fn extract_predicates_recursive(path: &PropertyPath, predicates: &mut HashSet<Term>) {
407        match path {
408            PropertyPath::Direct(term) => {
409                predicates.insert(term.clone());
410            }
411            PropertyPath::Inverse(inner)
412            | PropertyPath::ZeroOrMore(inner)
413            | PropertyPath::OneOrMore(inner)
414            | PropertyPath::ZeroOrOne(inner) => {
415                Self::extract_predicates_recursive(inner, predicates);
416            }
417            PropertyPath::Sequence(left, right) | PropertyPath::Alternative(left, right) => {
418                Self::extract_predicates_recursive(left, predicates);
419                Self::extract_predicates_recursive(right, predicates);
420            }
421            PropertyPath::NegatedPropertySet(terms) => {
422                for term in terms {
423                    predicates.insert(term.clone());
424                }
425            }
426        }
427    }
428}
429
430/// Path complexity metrics
431#[derive(Debug, Clone)]
432pub struct PathComplexity {
433    pub depth: usize,
434    pub branching_factor: usize,
435    pub has_recursion: bool,
436    pub has_negation: bool,
437    pub estimated_cost: f64,
438}
439
440/// Path optimization hints
441#[derive(Debug, Clone, PartialEq, Eq)]
442pub enum PathOptimizationHint {
443    LimitRecursionDepth,
444    UseIndexes,
445    ReorderAlternatives,
446    ConsiderMaterialization,
447    CacheResults,
448    UseBidirectionalSearch,
449}
450
451/// Specialized path evaluator with caching and optimization
452pub struct CachedPathEvaluator {
453    cache: Arc<PathCache>,
454    bidirectional: bool,
455}
456
457impl CachedPathEvaluator {
458    /// Create new cached evaluator
459    pub fn new() -> Self {
460        Self {
461            cache: Arc::new(PathCache::new()),
462            bidirectional: true,
463        }
464    }
465
466    /// Create with custom cache
467    pub fn with_cache(cache: Arc<PathCache>) -> Self {
468        Self {
469            cache,
470            bidirectional: true,
471        }
472    }
473
474    /// Enable or disable bidirectional search
475    pub fn set_bidirectional(&mut self, enabled: bool) {
476        self.bidirectional = enabled;
477    }
478
479    /// Evaluate path with caching
480    pub fn evaluate(
481        &self,
482        path: &PropertyPath,
483        start: &Term,
484        end: Option<&Term>,
485        dataset: &HashMap<Term, Vec<(Term, Term)>>,
486    ) -> Result<bool> {
487        // Check cache first
488        if let Some(cached) = self.cache.get(path, start, end) {
489            return Ok(cached);
490        }
491
492        // Evaluate path
493        let reachable = self.evaluate_uncached(path, start, end, dataset)?;
494
495        // Cache result
496        self.cache.insert(path, start, end, reachable);
497
498        Ok(reachable)
499    }
500
501    fn evaluate_uncached(
502        &self,
503        path: &PropertyPath,
504        start: &Term,
505        end: Option<&Term>,
506        dataset: &HashMap<Term, Vec<(Term, Term)>>,
507    ) -> Result<bool> {
508        // Simplified evaluation - in real implementation would use full path semantics
509        match path {
510            PropertyPath::Direct(predicate) => {
511                if let Some(neighbors) = dataset.get(start) {
512                    for (pred, neighbor) in neighbors {
513                        if pred == predicate {
514                            if let Some(target) = end {
515                                if neighbor == target {
516                                    return Ok(true);
517                                }
518                            } else {
519                                return Ok(true);
520                            }
521                        }
522                    }
523                }
524                Ok(false)
525            }
526            PropertyPath::ZeroOrMore(_) => {
527                // Zero-or-more always reaches start node
528                if end.is_none() {
529                    return Ok(true);
530                }
531                if let Some(target) = end {
532                    if start == target {
533                        return Ok(true);
534                    }
535                }
536                // Would need full BFS/DFS here
537                Ok(false)
538            }
539            _ => {
540                // Other path types - simplified
541                Ok(false)
542            }
543        }
544    }
545
546    /// Get cache statistics
547    pub fn cache_stats(&self) -> CacheStats {
548        self.cache.stats()
549    }
550
551    /// Clear cache
552    pub fn clear_cache(&self) {
553        self.cache.clear();
554    }
555}
556
557impl Default for CachedPathEvaluator {
558    fn default() -> Self {
559        Self::new()
560    }
561}
562
563/// Path reachability index for fast lookups
564pub struct ReachabilityIndex {
565    /// Forward reachability: node -> set of reachable nodes
566    forward: DashMap<String, HashSet<String>>,
567    /// Backward reachability: node -> set of nodes that can reach it
568    backward: DashMap<String, HashSet<String>>,
569    /// Transitive closure computed
570    computed: std::sync::atomic::AtomicBool,
571}
572
573impl ReachabilityIndex {
574    /// Create new reachability index
575    pub fn new() -> Self {
576        Self {
577            forward: DashMap::new(),
578            backward: DashMap::new(),
579            computed: std::sync::atomic::AtomicBool::new(false),
580        }
581    }
582
583    /// Add edge to index
584    pub fn add_edge(&self, from: &Term, to: &Term) {
585        let from_key = format!("{:?}", from);
586        let to_key = format!("{:?}", to);
587
588        self.forward
589            .entry(from_key.clone())
590            .or_default()
591            .insert(to_key.clone());
592
593        self.backward.entry(to_key).or_default().insert(from_key);
594
595        // Mark as needing recomputation
596        self.computed
597            .store(false, std::sync::atomic::Ordering::Relaxed);
598    }
599
600    /// Check if path exists from start to end
601    pub fn is_reachable(&self, from: &Term, to: &Term) -> bool {
602        let from_key = format!("{:?}", from);
603        let to_key = format!("{:?}", to);
604
605        if let Some(reachable) = self.forward.get(&from_key) {
606            reachable.contains(&to_key)
607        } else {
608            false
609        }
610    }
611
612    /// Compute transitive closure (Floyd-Warshall style)
613    pub fn compute_transitive_closure(&self) {
614        // Simplified - real implementation would do full transitive closure
615        self.computed
616            .store(true, std::sync::atomic::Ordering::Relaxed);
617    }
618
619    /// Clear the index
620    pub fn clear(&self) {
621        self.forward.clear();
622        self.backward.clear();
623        self.computed
624            .store(false, std::sync::atomic::Ordering::Relaxed);
625    }
626}
627
628impl Default for ReachabilityIndex {
629    fn default() -> Self {
630        Self::new()
631    }
632}
633
634/// Cost-based property path optimizer for selecting the best evaluation strategy
635pub struct CostBasedPathOptimizer {
636    /// Path statistics for cost estimation
637    path_stats: Arc<DashMap<String, PathStatistics>>,
638    /// Configuration for cost-based optimization
639    config: PathOptimizationConfig,
640}
641
642/// Statistics for a property path
643#[derive(Debug, Clone)]
644pub struct PathStatistics {
645    /// Average result cardinality
646    pub avg_cardinality: f64,
647    /// Average evaluation time in microseconds
648    pub avg_eval_time_us: f64,
649    /// Number of times this path was evaluated
650    pub evaluation_count: u64,
651    /// Selectivity (ratio of results to candidates)
652    pub selectivity: f64,
653    /// Last update timestamp
654    pub last_updated: std::time::Instant,
655}
656
657impl Default for PathStatistics {
658    fn default() -> Self {
659        Self {
660            avg_cardinality: 100.0, // Default estimate
661            avg_eval_time_us: 1000.0,
662            evaluation_count: 0,
663            selectivity: 0.1,
664            last_updated: std::time::Instant::now(),
665        }
666    }
667}
668
669/// Configuration for path optimization
670#[derive(Debug, Clone)]
671pub struct PathOptimizationConfig {
672    /// Enable cost-based optimization
673    pub enabled: bool,
674    /// Minimum samples before using statistics
675    pub min_samples: u64,
676    /// Cost weight for cardinality (0.0 to 1.0)
677    pub cardinality_weight: f64,
678    /// Cost weight for evaluation time (0.0 to 1.0)
679    pub time_weight: f64,
680    /// Enable adaptive learning from execution
681    pub adaptive_learning: bool,
682    /// Learning rate for statistics updates (0.0 to 1.0)
683    pub learning_rate: f64,
684}
685
686impl Default for PathOptimizationConfig {
687    fn default() -> Self {
688        Self {
689            enabled: true,
690            min_samples: 10,
691            cardinality_weight: 0.6,
692            time_weight: 0.4,
693            adaptive_learning: true,
694            learning_rate: 0.1,
695        }
696    }
697}
698
699/// Path evaluation strategy
700#[derive(Debug, Clone, Copy, PartialEq, Eq)]
701pub enum PathEvaluationStrategy {
702    /// Forward BFS from start node
703    ForwardBFS,
704    /// Backward BFS from end node
705    BackwardBFS,
706    /// Bidirectional BFS (meet in the middle)
707    BidirectionalBFS,
708    /// Use precomputed reachability index
709    IndexLookup,
710    /// Direct evaluation without optimization
711    Direct,
712}
713
714/// Cost estimate for a path evaluation strategy
715#[derive(Debug, Clone)]
716pub struct StrategyCostEstimate {
717    /// Estimated cardinality
718    pub estimated_cardinality: f64,
719    /// Estimated evaluation time in microseconds
720    pub estimated_time_us: f64,
721    /// Total estimated cost (weighted combination)
722    pub total_cost: f64,
723    /// Recommended strategy
724    pub strategy: PathEvaluationStrategy,
725    /// Confidence level (0.0 to 1.0)
726    pub confidence: f64,
727}
728
729impl CostBasedPathOptimizer {
730    /// Create a new cost-based path optimizer
731    pub fn new() -> Self {
732        Self::with_config(PathOptimizationConfig::default())
733    }
734
735    /// Create with custom configuration
736    pub fn with_config(config: PathOptimizationConfig) -> Self {
737        Self {
738            path_stats: Arc::new(DashMap::new()),
739            config,
740        }
741    }
742
743    /// Estimate cost and select best evaluation strategy
744    pub fn select_strategy(
745        &self,
746        path: &PropertyPath,
747        start: Option<&Term>,
748        end: Option<&Term>,
749        dataset_size: usize,
750    ) -> StrategyCostEstimate {
751        if !self.config.enabled {
752            return self.default_strategy(path);
753        }
754
755        let path_key = self.path_signature(path);
756        let stats = self.get_or_create_stats(&path_key);
757
758        // Analyze path complexity
759        let complexity = PathAnalyzer::analyze_complexity(path);
760
761        // Estimate costs for different strategies
762        let forward_cost =
763            self.estimate_forward_cost(&complexity, &stats, start.is_some(), dataset_size);
764        let backward_cost =
765            self.estimate_backward_cost(&complexity, &stats, end.is_some(), dataset_size);
766        let bidirectional_cost = self.estimate_bidirectional_cost(
767            &complexity,
768            &stats,
769            start.is_some() && end.is_some(),
770            dataset_size,
771        );
772        let index_cost = self.estimate_index_cost(&complexity, &stats);
773
774        // Select strategy with minimum cost
775        let estimates = vec![
776            (PathEvaluationStrategy::ForwardBFS, forward_cost),
777            (PathEvaluationStrategy::BackwardBFS, backward_cost),
778            (PathEvaluationStrategy::BidirectionalBFS, bidirectional_cost),
779            (PathEvaluationStrategy::IndexLookup, index_cost),
780        ];
781
782        let best = estimates
783            .into_iter()
784            .min_by(|(_, cost1), (_, cost2)| {
785                cost1
786                    .total_cost
787                    .partial_cmp(&cost2.total_cost)
788                    .unwrap_or(std::cmp::Ordering::Equal)
789            })
790            .unwrap_or((
791                PathEvaluationStrategy::Direct,
792                StrategyCostEstimate {
793                    estimated_cardinality: stats.avg_cardinality,
794                    estimated_time_us: stats.avg_eval_time_us,
795                    total_cost: stats.avg_eval_time_us,
796                    strategy: PathEvaluationStrategy::Direct,
797                    confidence: 0.5,
798                },
799            ));
800
801        best.1
802    }
803
804    /// Record actual execution results for adaptive learning
805    pub fn record_execution(
806        &self,
807        path: &PropertyPath,
808        actual_cardinality: u64,
809        actual_time_us: u64,
810    ) {
811        if !self.config.adaptive_learning {
812            return;
813        }
814
815        let path_key = self.path_signature(path);
816        let mut stats = self.get_or_create_stats(&path_key);
817
818        // Update statistics using exponential moving average
819        let alpha = self.config.learning_rate;
820        stats.avg_cardinality =
821            (1.0 - alpha) * stats.avg_cardinality + alpha * (actual_cardinality as f64);
822        stats.avg_eval_time_us =
823            (1.0 - alpha) * stats.avg_eval_time_us + alpha * (actual_time_us as f64);
824        stats.evaluation_count += 1;
825
826        // Update selectivity (simplified)
827        if actual_cardinality > 0 {
828            let observed_selectivity = (actual_cardinality as f64) / 1000.0; // Normalize
829            stats.selectivity = (1.0 - alpha) * stats.selectivity + alpha * observed_selectivity;
830        }
831
832        stats.last_updated = std::time::Instant::now();
833
834        // Update in map
835        self.path_stats.insert(path_key, stats);
836    }
837
838    /// Get statistics for a path
839    pub fn get_statistics(&self, path: &PropertyPath) -> Option<PathStatistics> {
840        let path_key = self.path_signature(path);
841        self.path_stats.get(&path_key).map(|s| s.clone())
842    }
843
844    /// Clear all statistics
845    pub fn clear_statistics(&self) {
846        self.path_stats.clear();
847    }
848
849    // Private helper methods
850
851    fn path_signature(&self, path: &PropertyPath) -> String {
852        format!("{:?}", path)
853    }
854
855    fn get_or_create_stats(&self, path_key: &str) -> PathStatistics {
856        self.path_stats
857            .entry(path_key.to_string())
858            .or_default()
859            .clone()
860    }
861
862    fn default_strategy(&self, path: &PropertyPath) -> StrategyCostEstimate {
863        let complexity = PathAnalyzer::analyze_complexity(path);
864        StrategyCostEstimate {
865            estimated_cardinality: 100.0,
866            estimated_time_us: complexity.estimated_cost * 1000.0,
867            total_cost: complexity.estimated_cost * 1000.0,
868            strategy: if complexity.has_recursion {
869                PathEvaluationStrategy::BidirectionalBFS
870            } else {
871                PathEvaluationStrategy::ForwardBFS
872            },
873            confidence: 0.5,
874        }
875    }
876
877    fn estimate_forward_cost(
878        &self,
879        complexity: &PathComplexity,
880        stats: &PathStatistics,
881        has_start: bool,
882        dataset_size: usize,
883    ) -> StrategyCostEstimate {
884        let base_cost = if has_start {
885            stats.avg_eval_time_us
886        } else {
887            stats.avg_eval_time_us * (dataset_size as f64).sqrt()
888        };
889
890        let cardinality = stats.avg_cardinality;
891        let time_cost = base_cost * (complexity.depth as f64);
892
893        let total_cost =
894            self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
895
896        let confidence = self.calculate_confidence(stats);
897
898        StrategyCostEstimate {
899            estimated_cardinality: cardinality,
900            estimated_time_us: time_cost,
901            total_cost,
902            strategy: PathEvaluationStrategy::ForwardBFS,
903            confidence,
904        }
905    }
906
907    fn estimate_backward_cost(
908        &self,
909        complexity: &PathComplexity,
910        stats: &PathStatistics,
911        has_end: bool,
912        dataset_size: usize,
913    ) -> StrategyCostEstimate {
914        let base_cost = if has_end {
915            stats.avg_eval_time_us
916        } else {
917            stats.avg_eval_time_us * (dataset_size as f64).sqrt()
918        };
919
920        let cardinality = stats.avg_cardinality;
921        let time_cost = base_cost * (complexity.depth as f64);
922
923        let total_cost =
924            self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
925
926        let confidence = self.calculate_confidence(stats);
927
928        StrategyCostEstimate {
929            estimated_cardinality: cardinality,
930            estimated_time_us: time_cost,
931            total_cost,
932            strategy: PathEvaluationStrategy::BackwardBFS,
933            confidence,
934        }
935    }
936
937    fn estimate_bidirectional_cost(
938        &self,
939        complexity: &PathComplexity,
940        stats: &PathStatistics,
941        has_both: bool,
942        _dataset_size: usize,
943    ) -> StrategyCostEstimate {
944        if !has_both {
945            // Bidirectional only works when both start and end are bound
946            return StrategyCostEstimate {
947                estimated_cardinality: f64::INFINITY,
948                estimated_time_us: f64::INFINITY,
949                total_cost: f64::INFINITY,
950                strategy: PathEvaluationStrategy::BidirectionalBFS,
951                confidence: 0.0,
952            };
953        }
954
955        // Bidirectional search is typically faster for long paths
956        let speedup_factor = if complexity.depth > 3 { 0.5 } else { 0.8 };
957        let cardinality = stats.avg_cardinality;
958        let time_cost = stats.avg_eval_time_us * speedup_factor;
959
960        let total_cost =
961            self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
962
963        let confidence = self.calculate_confidence(stats);
964
965        StrategyCostEstimate {
966            estimated_cardinality: cardinality,
967            estimated_time_us: time_cost,
968            total_cost,
969            strategy: PathEvaluationStrategy::BidirectionalBFS,
970            confidence,
971        }
972    }
973
974    fn estimate_index_cost(
975        &self,
976        complexity: &PathComplexity,
977        stats: &PathStatistics,
978    ) -> StrategyCostEstimate {
979        // Index lookup is very fast for simple paths
980        let is_simple = !complexity.has_recursion && complexity.depth <= 2;
981
982        let (time_cost, cardinality) = if is_simple {
983            (10.0, stats.avg_cardinality) // Constant time lookup
984        } else {
985            (f64::INFINITY, f64::INFINITY) // Not suitable for complex paths
986        };
987
988        let total_cost = if is_simple {
989            self.config.time_weight * time_cost
990        } else {
991            f64::INFINITY
992        };
993
994        StrategyCostEstimate {
995            estimated_cardinality: cardinality,
996            estimated_time_us: time_cost,
997            total_cost,
998            strategy: PathEvaluationStrategy::IndexLookup,
999            confidence: if is_simple { 0.9 } else { 0.0 },
1000        }
1001    }
1002
1003    fn calculate_confidence(&self, stats: &PathStatistics) -> f64 {
1004        if stats.evaluation_count < self.config.min_samples {
1005            (stats.evaluation_count as f64) / (self.config.min_samples as f64)
1006        } else {
1007            0.95 // High confidence after sufficient samples
1008        }
1009    }
1010}
1011
1012impl Default for CostBasedPathOptimizer {
1013    fn default() -> Self {
1014        Self::new()
1015    }
1016}
1017
1018#[cfg(test)]
1019mod tests {
1020    use super::*;
1021    use oxirs_core::model::NamedNode;
1022
1023    fn create_test_term(iri: &str) -> Term {
1024        Term::Iri(NamedNode::new(iri).unwrap())
1025    }
1026
1027    #[test]
1028    fn test_path_cache() {
1029        let cache = PathCache::new();
1030        let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1031        let start = create_test_term("http://example.org/alice");
1032        let end = create_test_term("http://example.org/bob");
1033
1034        // Initially not in cache
1035        assert!(cache.get(&path, &start, Some(&end)).is_none());
1036
1037        // Insert into cache
1038        cache.insert(&path, &start, Some(&end), true);
1039
1040        // Now in cache
1041        assert_eq!(cache.get(&path, &start, Some(&end)), Some(true));
1042
1043        let stats = cache.stats();
1044        assert_eq!(stats.hits, 1);
1045        assert_eq!(stats.misses, 1);
1046    }
1047
1048    #[test]
1049    fn test_path_analyzer_complexity() {
1050        let direct = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1051        let complexity = PathAnalyzer::analyze_complexity(&direct);
1052        assert_eq!(complexity.depth, 1);
1053        assert!(!complexity.has_recursion);
1054
1055        let recursive = PropertyPath::ZeroOrMore(Box::new(direct.clone()));
1056        let complexity = PathAnalyzer::analyze_complexity(&recursive);
1057        assert!(complexity.has_recursion);
1058        assert!(complexity.estimated_cost > 10.0);
1059    }
1060
1061    #[test]
1062    fn test_path_analyzer_predicates() {
1063        let knows = create_test_term("http://example.org/knows");
1064        let likes = create_test_term("http://example.org/likes");
1065
1066        let path = PropertyPath::Alternative(
1067            Box::new(PropertyPath::Direct(knows.clone())),
1068            Box::new(PropertyPath::Direct(likes.clone())),
1069        );
1070
1071        let predicates = PathAnalyzer::extract_predicates(&path);
1072        assert_eq!(predicates.len(), 2);
1073        assert!(predicates.contains(&knows));
1074        assert!(predicates.contains(&likes));
1075    }
1076
1077    #[test]
1078    fn test_path_optimization_hints() {
1079        let deep_recursive =
1080            PropertyPath::ZeroOrMore(Box::new(PropertyPath::ZeroOrMore(Box::new(
1081                PropertyPath::Direct(create_test_term("http://example.org/part")),
1082            ))));
1083
1084        let hints = PathAnalyzer::suggest_optimization(&deep_recursive);
1085        assert!(!hints.is_empty());
1086    }
1087
1088    #[test]
1089    fn test_cached_evaluator() {
1090        let evaluator = CachedPathEvaluator::new();
1091        let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1092        let start = create_test_term("http://example.org/alice");
1093        let end = create_test_term("http://example.org/bob");
1094
1095        let mut dataset = HashMap::new();
1096        dataset.insert(
1097            start.clone(),
1098            vec![(create_test_term("http://example.org/knows"), end.clone())],
1099        );
1100
1101        let result = evaluator.evaluate(&path, &start, Some(&end), &dataset);
1102        assert!(result.is_ok());
1103
1104        // Check cache hit
1105        let stats = evaluator.cache_stats();
1106        assert_eq!(stats.size, 1);
1107    }
1108
1109    #[test]
1110    fn test_reachability_index() {
1111        let index = ReachabilityIndex::new();
1112        let alice = create_test_term("http://example.org/alice");
1113        let bob = create_test_term("http://example.org/bob");
1114        let carol = create_test_term("http://example.org/carol");
1115
1116        index.add_edge(&alice, &bob);
1117        index.add_edge(&bob, &carol);
1118
1119        assert!(index.is_reachable(&alice, &bob));
1120        assert!(!index.is_reachable(&alice, &carol)); // Not computed yet
1121
1122        index.compute_transitive_closure();
1123    }
1124
1125    #[test]
1126    fn test_path_complexity_sequence() {
1127        let p1 = PropertyPath::Direct(create_test_term("http://example.org/p1"));
1128        let p2 = PropertyPath::Direct(create_test_term("http://example.org/p2"));
1129        let seq = PropertyPath::Sequence(Box::new(p1), Box::new(p2));
1130
1131        let complexity = PathAnalyzer::analyze_complexity(&seq);
1132        assert_eq!(complexity.depth, 2);
1133    }
1134
1135    #[test]
1136    fn test_path_determinism() {
1137        let direct = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1138        assert!(PathAnalyzer::is_deterministic(&direct));
1139
1140        let recursive = PropertyPath::ZeroOrMore(Box::new(direct));
1141        assert!(PathAnalyzer::is_deterministic(&recursive));
1142    }
1143
1144    #[test]
1145    #[ignore = "Slow test - eviction logic needs optimization"]
1146    fn test_cache_eviction() {
1147        let cache = PathCache::with_capacity(10);
1148        let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1149
1150        // Insert 15 entries
1151        for i in 0..15 {
1152            let term = create_test_term(&format!("http://example.org/node{}", i));
1153            cache.insert(&path, &term, None, true);
1154        }
1155
1156        // Should have evicted some entries
1157        let stats = cache.stats();
1158        assert!(stats.size <= 10);
1159    }
1160
1161    #[test]
1162    fn test_cost_based_optimizer_default() {
1163        let optimizer = CostBasedPathOptimizer::new();
1164        let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1165
1166        let start = create_test_term("http://example.org/alice");
1167        let end = create_test_term("http://example.org/bob");
1168
1169        let estimate = optimizer.select_strategy(&path, Some(&start), Some(&end), 1000);
1170
1171        assert!(estimate.total_cost > 0.0);
1172        assert!(estimate.confidence >= 0.0 && estimate.confidence <= 1.0);
1173    }
1174
1175    #[test]
1176    fn test_cost_based_optimizer_adaptive_learning() {
1177        let optimizer = CostBasedPathOptimizer::new();
1178        let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1179
1180        // Record some executions
1181        optimizer.record_execution(&path, 50, 500);
1182        optimizer.record_execution(&path, 45, 480);
1183        optimizer.record_execution(&path, 55, 520);
1184
1185        // Get updated statistics
1186        let stats = optimizer.get_statistics(&path);
1187        assert!(stats.is_some());
1188
1189        let stats = stats.unwrap();
1190        assert_eq!(stats.evaluation_count, 3);
1191        assert!(stats.avg_cardinality > 0.0);
1192    }
1193
1194    #[test]
1195    fn test_cost_based_optimizer_strategy_selection() {
1196        let optimizer = CostBasedPathOptimizer::new();
1197
1198        // Simple path - should prefer forward or index
1199        let simple = PropertyPath::Direct(create_test_term("http://example.org/p"));
1200        let start = create_test_term("http://example.org/alice");
1201        let end = create_test_term("http://example.org/bob");
1202
1203        let estimate = optimizer.select_strategy(&simple, Some(&start), Some(&end), 100);
1204        assert!(matches!(
1205            estimate.strategy,
1206            PathEvaluationStrategy::ForwardBFS
1207                | PathEvaluationStrategy::BidirectionalBFS
1208                | PathEvaluationStrategy::IndexLookup
1209        ));
1210
1211        // Recursive path - should consider bidirectional
1212        let recursive = PropertyPath::ZeroOrMore(Box::new(simple));
1213        let estimate = optimizer.select_strategy(&recursive, Some(&start), Some(&end), 100);
1214        assert!(estimate.estimated_time_us > 0.0);
1215    }
1216
1217    #[test]
1218    fn test_path_statistics_confidence() {
1219        let optimizer = CostBasedPathOptimizer::with_config(PathOptimizationConfig {
1220            min_samples: 5,
1221            ..Default::default()
1222        });
1223
1224        let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1225
1226        // Low confidence with few samples
1227        optimizer.record_execution(&path, 10, 100);
1228        let stats = optimizer.get_statistics(&path).unwrap();
1229        let confidence = if stats.evaluation_count < 5 {
1230            (stats.evaluation_count as f64) / 5.0
1231        } else {
1232            0.95
1233        };
1234        assert!(confidence < 0.5);
1235
1236        // Higher confidence with more samples
1237        for _ in 0..6 {
1238            optimizer.record_execution(&path, 10, 100);
1239        }
1240        let stats = optimizer.get_statistics(&path).unwrap();
1241        assert!(stats.evaluation_count >= 5);
1242    }
1243
1244    #[test]
1245    fn test_bidirectional_strategy_requires_both_endpoints() {
1246        let optimizer = CostBasedPathOptimizer::new();
1247        let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1248        let start = create_test_term("http://example.org/alice");
1249
1250        // Without end node, bidirectional should have infinite cost
1251        let estimate = optimizer.select_strategy(&path, Some(&start), None, 100);
1252        // The selected strategy should not be bidirectional (or if it is, it has special handling)
1253        assert!(
1254            estimate.total_cost.is_finite()
1255                || estimate.strategy != PathEvaluationStrategy::BidirectionalBFS
1256        );
1257    }
1258
1259    #[test]
1260    fn test_cost_optimizer_clear_statistics() {
1261        let optimizer = CostBasedPathOptimizer::new();
1262        let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1263
1264        optimizer.record_execution(&path, 10, 100);
1265        assert!(optimizer.get_statistics(&path).is_some());
1266
1267        optimizer.clear_statistics();
1268        // After clearing, we should get default stats
1269        let stats = optimizer.get_statistics(&path);
1270        assert!(stats.is_none() || stats.unwrap().evaluation_count == 0);
1271    }
1272
1273    #[test]
1274    fn test_path_evaluation_strategies() {
1275        // Test that all strategies are distinct
1276        let strategies = [
1277            PathEvaluationStrategy::ForwardBFS,
1278            PathEvaluationStrategy::BackwardBFS,
1279            PathEvaluationStrategy::BidirectionalBFS,
1280            PathEvaluationStrategy::IndexLookup,
1281            PathEvaluationStrategy::Direct,
1282        ];
1283
1284        for (i, s1) in strategies.iter().enumerate() {
1285            for (j, s2) in strategies.iter().enumerate() {
1286                if i == j {
1287                    assert_eq!(s1, s2);
1288                } else {
1289                    assert_ne!(s1, s2);
1290                }
1291            }
1292        }
1293    }
1294}
oxirs_arq/path_extensions.rs

oxirs_arq/
path_extensions.rs