1use crate::algebra::Term;
7use crate::path::{PathContext, PropertyPath};
8use anyhow::Result;
9use dashmap::DashMap;
10use std::collections::{HashMap, HashSet, VecDeque};
11use std::sync::Arc;
12
13pub struct PathCache {
15 cache: DashMap<PathCacheKey, PathCacheEntry>,
16 max_entries: usize,
17 hits: Arc<std::sync::atomic::AtomicUsize>,
18 misses: Arc<std::sync::atomic::AtomicUsize>,
19}
20
21#[derive(Debug, Clone, Hash, PartialEq, Eq)]
22struct PathCacheKey {
23 path: PropertyPath,
24 start: String, end: Option<String>, }
27
28#[derive(Debug, Clone)]
29struct PathCacheEntry {
30 reachable: bool,
31 #[allow(dead_code)]
32 intermediate_nodes: Vec<String>,
33 #[allow(dead_code)]
34 path_length: usize,
35 timestamp: std::time::Instant,
36}
37
38impl PathCache {
39 pub fn new() -> Self {
41 Self::with_capacity(10000)
42 }
43
44 pub fn with_capacity(max_entries: usize) -> Self {
46 Self {
47 cache: DashMap::new(),
48 max_entries,
49 hits: Arc::new(std::sync::atomic::AtomicUsize::new(0)),
50 misses: Arc::new(std::sync::atomic::AtomicUsize::new(0)),
51 }
52 }
53
54 pub fn get(&self, path: &PropertyPath, start: &Term, end: Option<&Term>) -> Option<bool> {
56 let key = PathCacheKey {
57 path: path.clone(),
58 start: format!("{:?}", start),
59 end: end.map(|t| format!("{:?}", t)),
60 };
61
62 if let Some(entry) = self.cache.get(&key) {
63 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
64 Some(entry.reachable)
65 } else {
66 self.misses
67 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
68 None
69 }
70 }
71
72 pub fn insert(&self, path: &PropertyPath, start: &Term, end: Option<&Term>, reachable: bool) {
74 if self.cache.len() >= self.max_entries {
76 self.evict_oldest(self.max_entries / 10);
77 }
78
79 let key = PathCacheKey {
80 path: path.clone(),
81 start: format!("{:?}", start),
82 end: end.map(|t| format!("{:?}", t)),
83 };
84
85 let entry = PathCacheEntry {
86 reachable,
87 intermediate_nodes: Vec::new(),
88 path_length: 0,
89 timestamp: std::time::Instant::now(),
90 };
91
92 self.cache.insert(key, entry);
93 }
94
95 pub fn clear(&self) {
97 self.cache.clear();
98 }
99
100 pub fn stats(&self) -> CacheStats {
102 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
103 let misses = self.misses.load(std::sync::atomic::Ordering::Relaxed);
104 let total = hits + misses;
105 let hit_rate = if total > 0 {
106 hits as f64 / total as f64
107 } else {
108 0.0
109 };
110
111 CacheStats {
112 hits,
113 misses,
114 size: self.cache.len(),
115 capacity: self.max_entries,
116 hit_rate,
117 }
118 }
119
120 fn evict_oldest(&self, count: usize) {
121 let mut entries: Vec<_> = self.cache.iter().collect();
122 entries.sort_by_key(|e| e.value().timestamp);
123
124 for entry in entries.iter().take(count) {
125 self.cache.remove(entry.key());
126 }
127 }
128}
129
130impl Default for PathCache {
131 fn default() -> Self {
132 Self::new()
133 }
134}
135
136#[derive(Debug, Clone)]
138pub struct CacheStats {
139 pub hits: usize,
140 pub misses: usize,
141 pub size: usize,
142 pub capacity: usize,
143 pub hit_rate: f64,
144}
145
146pub struct BidirectionalPathSearch {
148 context: PathContext,
149}
150
151impl BidirectionalPathSearch {
152 pub fn new(context: PathContext) -> Self {
154 Self { context }
155 }
156
157 #[allow(unused_variables)]
159 pub fn search(
160 &self,
161 path: &PropertyPath,
162 start: &Term,
163 end: &Term,
164 dataset: &HashMap<Term, Vec<(Term, Term)>>,
165 ) -> Result<Option<Vec<Term>>> {
166 let mut forward_visited: HashMap<Term, Option<Term>> = HashMap::new();
168 let mut forward_queue = VecDeque::new();
169 forward_queue.push_back(start.clone());
170 forward_visited.insert(start.clone(), None);
171
172 let mut backward_visited: HashMap<Term, Option<Term>> = HashMap::new();
174 let mut backward_queue = VecDeque::new();
175 backward_queue.push_back(end.clone());
176 backward_visited.insert(end.clone(), None);
177
178 let mut iterations = 0;
179 let max_iterations = self.context.max_nodes;
180
181 while !forward_queue.is_empty() && !backward_queue.is_empty() && iterations < max_iterations
182 {
183 iterations += 1;
184
185 if forward_queue.len() <= backward_queue.len() {
187 if let Some(meeting_point) = self.expand_frontier(
188 &mut forward_queue,
189 &mut forward_visited,
190 &backward_visited,
191 dataset,
192 false,
193 )? {
194 return Ok(Some(self.reconstruct_path(
195 &meeting_point,
196 &forward_visited,
197 &backward_visited,
198 start,
199 end,
200 )));
201 }
202 } else {
203 if let Some(meeting_point) = self.expand_frontier(
205 &mut backward_queue,
206 &mut backward_visited,
207 &forward_visited,
208 dataset,
209 true,
210 )? {
211 return Ok(Some(self.reconstruct_path(
212 &meeting_point,
213 &forward_visited,
214 &backward_visited,
215 start,
216 end,
217 )));
218 }
219 }
220 }
221
222 Ok(None)
223 }
224
225 fn expand_frontier(
226 &self,
227 queue: &mut VecDeque<Term>,
228 visited: &mut HashMap<Term, Option<Term>>,
229 other_visited: &HashMap<Term, Option<Term>>,
230 dataset: &HashMap<Term, Vec<(Term, Term)>>,
231 _reverse: bool,
232 ) -> Result<Option<Term>> {
233 if let Some(current) = queue.pop_front() {
234 if other_visited.contains_key(¤t) {
236 return Ok(Some(current));
237 }
238
239 if let Some(neighbors) = dataset.get(¤t) {
241 for (predicate, neighbor) in neighbors {
242 let _ = predicate; if !visited.contains_key(neighbor) {
244 visited.insert(neighbor.clone(), Some(current.clone()));
245 queue.push_back(neighbor.clone());
246 }
247 }
248 }
249 }
250
251 Ok(None)
252 }
253
254 fn reconstruct_path(
255 &self,
256 meeting_point: &Term,
257 forward_visited: &HashMap<Term, Option<Term>>,
258 backward_visited: &HashMap<Term, Option<Term>>,
259 start: &Term,
260 end: &Term,
261 ) -> Vec<Term> {
262 let mut path = Vec::new();
263
264 let mut current = meeting_point.clone();
266 let mut forward_path = Vec::new();
267 while let Some(Some(prev)) = forward_visited.get(¤t) {
268 forward_path.push(current.clone());
269 current = prev.clone();
270 if current == *start {
271 forward_path.push(current.clone());
272 break;
273 }
274 }
275 forward_path.reverse();
276 path.extend(forward_path);
277
278 current = meeting_point.clone();
280 while let Some(Some(next)) = backward_visited.get(¤t) {
281 current = next.clone();
282 path.push(current.clone());
283 if current == *end {
284 break;
285 }
286 }
287
288 path
289 }
290}
291
292pub struct PathAnalyzer;
294
295impl PathAnalyzer {
296 pub fn analyze_complexity(path: &PropertyPath) -> PathComplexity {
298 match path {
299 PropertyPath::Direct(_) => PathComplexity {
300 depth: 1,
301 branching_factor: 1,
302 has_recursion: false,
303 has_negation: false,
304 estimated_cost: 1.0,
305 },
306 PropertyPath::Inverse(inner) => {
307 let mut complexity = Self::analyze_complexity(inner);
308 complexity.estimated_cost *= 1.2; complexity
310 }
311 PropertyPath::Sequence(left, right) => {
312 let left_complexity = Self::analyze_complexity(left);
313 let right_complexity = Self::analyze_complexity(right);
314 PathComplexity {
315 depth: left_complexity.depth + right_complexity.depth,
316 branching_factor: left_complexity
317 .branching_factor
318 .max(right_complexity.branching_factor),
319 has_recursion: left_complexity.has_recursion || right_complexity.has_recursion,
320 has_negation: left_complexity.has_negation || right_complexity.has_negation,
321 estimated_cost: left_complexity.estimated_cost
322 * right_complexity.estimated_cost,
323 }
324 }
325 PropertyPath::Alternative(left, right) => {
326 let left_complexity = Self::analyze_complexity(left);
327 let right_complexity = Self::analyze_complexity(right);
328 PathComplexity {
329 depth: left_complexity.depth.max(right_complexity.depth),
330 branching_factor: left_complexity.branching_factor
331 + right_complexity.branching_factor,
332 has_recursion: left_complexity.has_recursion || right_complexity.has_recursion,
333 has_negation: left_complexity.has_negation || right_complexity.has_negation,
334 estimated_cost: left_complexity.estimated_cost
335 + right_complexity.estimated_cost,
336 }
337 }
338 PropertyPath::ZeroOrMore(inner) | PropertyPath::OneOrMore(inner) => {
339 let mut complexity = Self::analyze_complexity(inner);
340 complexity.depth *= 10; complexity.has_recursion = true;
342 complexity.estimated_cost *= 100.0; complexity
344 }
345 PropertyPath::ZeroOrOne(inner) => {
346 let mut complexity = Self::analyze_complexity(inner);
347 complexity.branching_factor += 1; complexity.estimated_cost *= 1.5;
349 complexity
350 }
351 PropertyPath::NegatedPropertySet(_) => PathComplexity {
352 depth: 1,
353 branching_factor: 1,
354 has_recursion: false,
355 has_negation: true,
356 estimated_cost: 2.0, },
358 }
359 }
360
361 pub fn suggest_optimization(path: &PropertyPath) -> Vec<PathOptimizationHint> {
363 let complexity = Self::analyze_complexity(path);
364 let mut hints = Vec::new();
365
366 if complexity.has_recursion && complexity.depth > 100 {
367 hints.push(PathOptimizationHint::LimitRecursionDepth);
368 }
369
370 if complexity.branching_factor > 10 {
371 hints.push(PathOptimizationHint::UseIndexes);
372 }
373
374 if matches!(path, PropertyPath::Alternative(_, _)) {
375 hints.push(PathOptimizationHint::ReorderAlternatives);
376 }
377
378 if complexity.estimated_cost > 1000.0 {
379 hints.push(PathOptimizationHint::ConsiderMaterialization);
380 }
381
382 hints
383 }
384
385 pub fn is_deterministic(path: &PropertyPath) -> bool {
387 match path {
388 PropertyPath::Direct(_)
389 | PropertyPath::Inverse(_)
390 | PropertyPath::Sequence(_, _)
391 | PropertyPath::Alternative(_, _)
392 | PropertyPath::ZeroOrMore(_)
393 | PropertyPath::OneOrMore(_)
394 | PropertyPath::ZeroOrOne(_) => true,
395 PropertyPath::NegatedPropertySet(_) => true,
396 }
397 }
398
399 pub fn extract_predicates(path: &PropertyPath) -> HashSet<Term> {
401 let mut predicates = HashSet::new();
402 Self::extract_predicates_recursive(path, &mut predicates);
403 predicates
404 }
405
406 fn extract_predicates_recursive(path: &PropertyPath, predicates: &mut HashSet<Term>) {
407 match path {
408 PropertyPath::Direct(term) => {
409 predicates.insert(term.clone());
410 }
411 PropertyPath::Inverse(inner)
412 | PropertyPath::ZeroOrMore(inner)
413 | PropertyPath::OneOrMore(inner)
414 | PropertyPath::ZeroOrOne(inner) => {
415 Self::extract_predicates_recursive(inner, predicates);
416 }
417 PropertyPath::Sequence(left, right) | PropertyPath::Alternative(left, right) => {
418 Self::extract_predicates_recursive(left, predicates);
419 Self::extract_predicates_recursive(right, predicates);
420 }
421 PropertyPath::NegatedPropertySet(terms) => {
422 for term in terms {
423 predicates.insert(term.clone());
424 }
425 }
426 }
427 }
428}
429
430#[derive(Debug, Clone)]
432pub struct PathComplexity {
433 pub depth: usize,
434 pub branching_factor: usize,
435 pub has_recursion: bool,
436 pub has_negation: bool,
437 pub estimated_cost: f64,
438}
439
440#[derive(Debug, Clone, PartialEq, Eq)]
442pub enum PathOptimizationHint {
443 LimitRecursionDepth,
444 UseIndexes,
445 ReorderAlternatives,
446 ConsiderMaterialization,
447 CacheResults,
448 UseBidirectionalSearch,
449}
450
451pub struct CachedPathEvaluator {
453 cache: Arc<PathCache>,
454 bidirectional: bool,
455}
456
457impl CachedPathEvaluator {
458 pub fn new() -> Self {
460 Self {
461 cache: Arc::new(PathCache::new()),
462 bidirectional: true,
463 }
464 }
465
466 pub fn with_cache(cache: Arc<PathCache>) -> Self {
468 Self {
469 cache,
470 bidirectional: true,
471 }
472 }
473
474 pub fn set_bidirectional(&mut self, enabled: bool) {
476 self.bidirectional = enabled;
477 }
478
479 pub fn evaluate(
481 &self,
482 path: &PropertyPath,
483 start: &Term,
484 end: Option<&Term>,
485 dataset: &HashMap<Term, Vec<(Term, Term)>>,
486 ) -> Result<bool> {
487 if let Some(cached) = self.cache.get(path, start, end) {
489 return Ok(cached);
490 }
491
492 let reachable = self.evaluate_uncached(path, start, end, dataset)?;
494
495 self.cache.insert(path, start, end, reachable);
497
498 Ok(reachable)
499 }
500
501 fn evaluate_uncached(
502 &self,
503 path: &PropertyPath,
504 start: &Term,
505 end: Option<&Term>,
506 dataset: &HashMap<Term, Vec<(Term, Term)>>,
507 ) -> Result<bool> {
508 match path {
510 PropertyPath::Direct(predicate) => {
511 if let Some(neighbors) = dataset.get(start) {
512 for (pred, neighbor) in neighbors {
513 if pred == predicate {
514 if let Some(target) = end {
515 if neighbor == target {
516 return Ok(true);
517 }
518 } else {
519 return Ok(true);
520 }
521 }
522 }
523 }
524 Ok(false)
525 }
526 PropertyPath::ZeroOrMore(_) => {
527 if end.is_none() {
529 return Ok(true);
530 }
531 if let Some(target) = end {
532 if start == target {
533 return Ok(true);
534 }
535 }
536 Ok(false)
538 }
539 _ => {
540 Ok(false)
542 }
543 }
544 }
545
546 pub fn cache_stats(&self) -> CacheStats {
548 self.cache.stats()
549 }
550
551 pub fn clear_cache(&self) {
553 self.cache.clear();
554 }
555}
556
557impl Default for CachedPathEvaluator {
558 fn default() -> Self {
559 Self::new()
560 }
561}
562
563pub struct ReachabilityIndex {
565 forward: DashMap<String, HashSet<String>>,
567 backward: DashMap<String, HashSet<String>>,
569 computed: std::sync::atomic::AtomicBool,
571}
572
573impl ReachabilityIndex {
574 pub fn new() -> Self {
576 Self {
577 forward: DashMap::new(),
578 backward: DashMap::new(),
579 computed: std::sync::atomic::AtomicBool::new(false),
580 }
581 }
582
583 pub fn add_edge(&self, from: &Term, to: &Term) {
585 let from_key = format!("{:?}", from);
586 let to_key = format!("{:?}", to);
587
588 self.forward
589 .entry(from_key.clone())
590 .or_default()
591 .insert(to_key.clone());
592
593 self.backward.entry(to_key).or_default().insert(from_key);
594
595 self.computed
597 .store(false, std::sync::atomic::Ordering::Relaxed);
598 }
599
600 pub fn is_reachable(&self, from: &Term, to: &Term) -> bool {
602 let from_key = format!("{:?}", from);
603 let to_key = format!("{:?}", to);
604
605 if let Some(reachable) = self.forward.get(&from_key) {
606 reachable.contains(&to_key)
607 } else {
608 false
609 }
610 }
611
612 pub fn compute_transitive_closure(&self) {
614 self.computed
616 .store(true, std::sync::atomic::Ordering::Relaxed);
617 }
618
619 pub fn clear(&self) {
621 self.forward.clear();
622 self.backward.clear();
623 self.computed
624 .store(false, std::sync::atomic::Ordering::Relaxed);
625 }
626}
627
628impl Default for ReachabilityIndex {
629 fn default() -> Self {
630 Self::new()
631 }
632}
633
634pub struct CostBasedPathOptimizer {
636 path_stats: Arc<DashMap<String, PathStatistics>>,
638 config: PathOptimizationConfig,
640}
641
642#[derive(Debug, Clone)]
644pub struct PathStatistics {
645 pub avg_cardinality: f64,
647 pub avg_eval_time_us: f64,
649 pub evaluation_count: u64,
651 pub selectivity: f64,
653 pub last_updated: std::time::Instant,
655}
656
657impl Default for PathStatistics {
658 fn default() -> Self {
659 Self {
660 avg_cardinality: 100.0, avg_eval_time_us: 1000.0,
662 evaluation_count: 0,
663 selectivity: 0.1,
664 last_updated: std::time::Instant::now(),
665 }
666 }
667}
668
669#[derive(Debug, Clone)]
671pub struct PathOptimizationConfig {
672 pub enabled: bool,
674 pub min_samples: u64,
676 pub cardinality_weight: f64,
678 pub time_weight: f64,
680 pub adaptive_learning: bool,
682 pub learning_rate: f64,
684}
685
686impl Default for PathOptimizationConfig {
687 fn default() -> Self {
688 Self {
689 enabled: true,
690 min_samples: 10,
691 cardinality_weight: 0.6,
692 time_weight: 0.4,
693 adaptive_learning: true,
694 learning_rate: 0.1,
695 }
696 }
697}
698
699#[derive(Debug, Clone, Copy, PartialEq, Eq)]
701pub enum PathEvaluationStrategy {
702 ForwardBFS,
704 BackwardBFS,
706 BidirectionalBFS,
708 IndexLookup,
710 Direct,
712}
713
714#[derive(Debug, Clone)]
716pub struct StrategyCostEstimate {
717 pub estimated_cardinality: f64,
719 pub estimated_time_us: f64,
721 pub total_cost: f64,
723 pub strategy: PathEvaluationStrategy,
725 pub confidence: f64,
727}
728
729impl CostBasedPathOptimizer {
730 pub fn new() -> Self {
732 Self::with_config(PathOptimizationConfig::default())
733 }
734
735 pub fn with_config(config: PathOptimizationConfig) -> Self {
737 Self {
738 path_stats: Arc::new(DashMap::new()),
739 config,
740 }
741 }
742
743 pub fn select_strategy(
745 &self,
746 path: &PropertyPath,
747 start: Option<&Term>,
748 end: Option<&Term>,
749 dataset_size: usize,
750 ) -> StrategyCostEstimate {
751 if !self.config.enabled {
752 return self.default_strategy(path);
753 }
754
755 let path_key = self.path_signature(path);
756 let stats = self.get_or_create_stats(&path_key);
757
758 let complexity = PathAnalyzer::analyze_complexity(path);
760
761 let forward_cost =
763 self.estimate_forward_cost(&complexity, &stats, start.is_some(), dataset_size);
764 let backward_cost =
765 self.estimate_backward_cost(&complexity, &stats, end.is_some(), dataset_size);
766 let bidirectional_cost = self.estimate_bidirectional_cost(
767 &complexity,
768 &stats,
769 start.is_some() && end.is_some(),
770 dataset_size,
771 );
772 let index_cost = self.estimate_index_cost(&complexity, &stats);
773
774 let estimates = vec![
776 (PathEvaluationStrategy::ForwardBFS, forward_cost),
777 (PathEvaluationStrategy::BackwardBFS, backward_cost),
778 (PathEvaluationStrategy::BidirectionalBFS, bidirectional_cost),
779 (PathEvaluationStrategy::IndexLookup, index_cost),
780 ];
781
782 let best = estimates
783 .into_iter()
784 .min_by(|(_, cost1), (_, cost2)| {
785 cost1
786 .total_cost
787 .partial_cmp(&cost2.total_cost)
788 .unwrap_or(std::cmp::Ordering::Equal)
789 })
790 .unwrap_or((
791 PathEvaluationStrategy::Direct,
792 StrategyCostEstimate {
793 estimated_cardinality: stats.avg_cardinality,
794 estimated_time_us: stats.avg_eval_time_us,
795 total_cost: stats.avg_eval_time_us,
796 strategy: PathEvaluationStrategy::Direct,
797 confidence: 0.5,
798 },
799 ));
800
801 best.1
802 }
803
804 pub fn record_execution(
806 &self,
807 path: &PropertyPath,
808 actual_cardinality: u64,
809 actual_time_us: u64,
810 ) {
811 if !self.config.adaptive_learning {
812 return;
813 }
814
815 let path_key = self.path_signature(path);
816 let mut stats = self.get_or_create_stats(&path_key);
817
818 let alpha = self.config.learning_rate;
820 stats.avg_cardinality =
821 (1.0 - alpha) * stats.avg_cardinality + alpha * (actual_cardinality as f64);
822 stats.avg_eval_time_us =
823 (1.0 - alpha) * stats.avg_eval_time_us + alpha * (actual_time_us as f64);
824 stats.evaluation_count += 1;
825
826 if actual_cardinality > 0 {
828 let observed_selectivity = (actual_cardinality as f64) / 1000.0; stats.selectivity = (1.0 - alpha) * stats.selectivity + alpha * observed_selectivity;
830 }
831
832 stats.last_updated = std::time::Instant::now();
833
834 self.path_stats.insert(path_key, stats);
836 }
837
838 pub fn get_statistics(&self, path: &PropertyPath) -> Option<PathStatistics> {
840 let path_key = self.path_signature(path);
841 self.path_stats.get(&path_key).map(|s| s.clone())
842 }
843
844 pub fn clear_statistics(&self) {
846 self.path_stats.clear();
847 }
848
849 fn path_signature(&self, path: &PropertyPath) -> String {
852 format!("{:?}", path)
853 }
854
855 fn get_or_create_stats(&self, path_key: &str) -> PathStatistics {
856 self.path_stats
857 .entry(path_key.to_string())
858 .or_default()
859 .clone()
860 }
861
862 fn default_strategy(&self, path: &PropertyPath) -> StrategyCostEstimate {
863 let complexity = PathAnalyzer::analyze_complexity(path);
864 StrategyCostEstimate {
865 estimated_cardinality: 100.0,
866 estimated_time_us: complexity.estimated_cost * 1000.0,
867 total_cost: complexity.estimated_cost * 1000.0,
868 strategy: if complexity.has_recursion {
869 PathEvaluationStrategy::BidirectionalBFS
870 } else {
871 PathEvaluationStrategy::ForwardBFS
872 },
873 confidence: 0.5,
874 }
875 }
876
877 fn estimate_forward_cost(
878 &self,
879 complexity: &PathComplexity,
880 stats: &PathStatistics,
881 has_start: bool,
882 dataset_size: usize,
883 ) -> StrategyCostEstimate {
884 let base_cost = if has_start {
885 stats.avg_eval_time_us
886 } else {
887 stats.avg_eval_time_us * (dataset_size as f64).sqrt()
888 };
889
890 let cardinality = stats.avg_cardinality;
891 let time_cost = base_cost * (complexity.depth as f64);
892
893 let total_cost =
894 self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
895
896 let confidence = self.calculate_confidence(stats);
897
898 StrategyCostEstimate {
899 estimated_cardinality: cardinality,
900 estimated_time_us: time_cost,
901 total_cost,
902 strategy: PathEvaluationStrategy::ForwardBFS,
903 confidence,
904 }
905 }
906
907 fn estimate_backward_cost(
908 &self,
909 complexity: &PathComplexity,
910 stats: &PathStatistics,
911 has_end: bool,
912 dataset_size: usize,
913 ) -> StrategyCostEstimate {
914 let base_cost = if has_end {
915 stats.avg_eval_time_us
916 } else {
917 stats.avg_eval_time_us * (dataset_size as f64).sqrt()
918 };
919
920 let cardinality = stats.avg_cardinality;
921 let time_cost = base_cost * (complexity.depth as f64);
922
923 let total_cost =
924 self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
925
926 let confidence = self.calculate_confidence(stats);
927
928 StrategyCostEstimate {
929 estimated_cardinality: cardinality,
930 estimated_time_us: time_cost,
931 total_cost,
932 strategy: PathEvaluationStrategy::BackwardBFS,
933 confidence,
934 }
935 }
936
937 fn estimate_bidirectional_cost(
938 &self,
939 complexity: &PathComplexity,
940 stats: &PathStatistics,
941 has_both: bool,
942 _dataset_size: usize,
943 ) -> StrategyCostEstimate {
944 if !has_both {
945 return StrategyCostEstimate {
947 estimated_cardinality: f64::INFINITY,
948 estimated_time_us: f64::INFINITY,
949 total_cost: f64::INFINITY,
950 strategy: PathEvaluationStrategy::BidirectionalBFS,
951 confidence: 0.0,
952 };
953 }
954
955 let speedup_factor = if complexity.depth > 3 { 0.5 } else { 0.8 };
957 let cardinality = stats.avg_cardinality;
958 let time_cost = stats.avg_eval_time_us * speedup_factor;
959
960 let total_cost =
961 self.config.cardinality_weight * cardinality + self.config.time_weight * time_cost;
962
963 let confidence = self.calculate_confidence(stats);
964
965 StrategyCostEstimate {
966 estimated_cardinality: cardinality,
967 estimated_time_us: time_cost,
968 total_cost,
969 strategy: PathEvaluationStrategy::BidirectionalBFS,
970 confidence,
971 }
972 }
973
974 fn estimate_index_cost(
975 &self,
976 complexity: &PathComplexity,
977 stats: &PathStatistics,
978 ) -> StrategyCostEstimate {
979 let is_simple = !complexity.has_recursion && complexity.depth <= 2;
981
982 let (time_cost, cardinality) = if is_simple {
983 (10.0, stats.avg_cardinality) } else {
985 (f64::INFINITY, f64::INFINITY) };
987
988 let total_cost = if is_simple {
989 self.config.time_weight * time_cost
990 } else {
991 f64::INFINITY
992 };
993
994 StrategyCostEstimate {
995 estimated_cardinality: cardinality,
996 estimated_time_us: time_cost,
997 total_cost,
998 strategy: PathEvaluationStrategy::IndexLookup,
999 confidence: if is_simple { 0.9 } else { 0.0 },
1000 }
1001 }
1002
1003 fn calculate_confidence(&self, stats: &PathStatistics) -> f64 {
1004 if stats.evaluation_count < self.config.min_samples {
1005 (stats.evaluation_count as f64) / (self.config.min_samples as f64)
1006 } else {
1007 0.95 }
1009 }
1010}
1011
1012impl Default for CostBasedPathOptimizer {
1013 fn default() -> Self {
1014 Self::new()
1015 }
1016}
1017
1018#[cfg(test)]
1019mod tests {
1020 use super::*;
1021 use oxirs_core::model::NamedNode;
1022
1023 fn create_test_term(iri: &str) -> Term {
1024 Term::Iri(NamedNode::new(iri).unwrap())
1025 }
1026
1027 #[test]
1028 fn test_path_cache() {
1029 let cache = PathCache::new();
1030 let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1031 let start = create_test_term("http://example.org/alice");
1032 let end = create_test_term("http://example.org/bob");
1033
1034 assert!(cache.get(&path, &start, Some(&end)).is_none());
1036
1037 cache.insert(&path, &start, Some(&end), true);
1039
1040 assert_eq!(cache.get(&path, &start, Some(&end)), Some(true));
1042
1043 let stats = cache.stats();
1044 assert_eq!(stats.hits, 1);
1045 assert_eq!(stats.misses, 1);
1046 }
1047
1048 #[test]
1049 fn test_path_analyzer_complexity() {
1050 let direct = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1051 let complexity = PathAnalyzer::analyze_complexity(&direct);
1052 assert_eq!(complexity.depth, 1);
1053 assert!(!complexity.has_recursion);
1054
1055 let recursive = PropertyPath::ZeroOrMore(Box::new(direct.clone()));
1056 let complexity = PathAnalyzer::analyze_complexity(&recursive);
1057 assert!(complexity.has_recursion);
1058 assert!(complexity.estimated_cost > 10.0);
1059 }
1060
1061 #[test]
1062 fn test_path_analyzer_predicates() {
1063 let knows = create_test_term("http://example.org/knows");
1064 let likes = create_test_term("http://example.org/likes");
1065
1066 let path = PropertyPath::Alternative(
1067 Box::new(PropertyPath::Direct(knows.clone())),
1068 Box::new(PropertyPath::Direct(likes.clone())),
1069 );
1070
1071 let predicates = PathAnalyzer::extract_predicates(&path);
1072 assert_eq!(predicates.len(), 2);
1073 assert!(predicates.contains(&knows));
1074 assert!(predicates.contains(&likes));
1075 }
1076
1077 #[test]
1078 fn test_path_optimization_hints() {
1079 let deep_recursive =
1080 PropertyPath::ZeroOrMore(Box::new(PropertyPath::ZeroOrMore(Box::new(
1081 PropertyPath::Direct(create_test_term("http://example.org/part")),
1082 ))));
1083
1084 let hints = PathAnalyzer::suggest_optimization(&deep_recursive);
1085 assert!(!hints.is_empty());
1086 }
1087
1088 #[test]
1089 fn test_cached_evaluator() {
1090 let evaluator = CachedPathEvaluator::new();
1091 let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1092 let start = create_test_term("http://example.org/alice");
1093 let end = create_test_term("http://example.org/bob");
1094
1095 let mut dataset = HashMap::new();
1096 dataset.insert(
1097 start.clone(),
1098 vec![(create_test_term("http://example.org/knows"), end.clone())],
1099 );
1100
1101 let result = evaluator.evaluate(&path, &start, Some(&end), &dataset);
1102 assert!(result.is_ok());
1103
1104 let stats = evaluator.cache_stats();
1106 assert_eq!(stats.size, 1);
1107 }
1108
1109 #[test]
1110 fn test_reachability_index() {
1111 let index = ReachabilityIndex::new();
1112 let alice = create_test_term("http://example.org/alice");
1113 let bob = create_test_term("http://example.org/bob");
1114 let carol = create_test_term("http://example.org/carol");
1115
1116 index.add_edge(&alice, &bob);
1117 index.add_edge(&bob, &carol);
1118
1119 assert!(index.is_reachable(&alice, &bob));
1120 assert!(!index.is_reachable(&alice, &carol)); index.compute_transitive_closure();
1123 }
1124
1125 #[test]
1126 fn test_path_complexity_sequence() {
1127 let p1 = PropertyPath::Direct(create_test_term("http://example.org/p1"));
1128 let p2 = PropertyPath::Direct(create_test_term("http://example.org/p2"));
1129 let seq = PropertyPath::Sequence(Box::new(p1), Box::new(p2));
1130
1131 let complexity = PathAnalyzer::analyze_complexity(&seq);
1132 assert_eq!(complexity.depth, 2);
1133 }
1134
1135 #[test]
1136 fn test_path_determinism() {
1137 let direct = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1138 assert!(PathAnalyzer::is_deterministic(&direct));
1139
1140 let recursive = PropertyPath::ZeroOrMore(Box::new(direct));
1141 assert!(PathAnalyzer::is_deterministic(&recursive));
1142 }
1143
1144 #[test]
1145 #[ignore = "Slow test - eviction logic needs optimization"]
1146 fn test_cache_eviction() {
1147 let cache = PathCache::with_capacity(10);
1148 let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1149
1150 for i in 0..15 {
1152 let term = create_test_term(&format!("http://example.org/node{}", i));
1153 cache.insert(&path, &term, None, true);
1154 }
1155
1156 let stats = cache.stats();
1158 assert!(stats.size <= 10);
1159 }
1160
1161 #[test]
1162 fn test_cost_based_optimizer_default() {
1163 let optimizer = CostBasedPathOptimizer::new();
1164 let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1165
1166 let start = create_test_term("http://example.org/alice");
1167 let end = create_test_term("http://example.org/bob");
1168
1169 let estimate = optimizer.select_strategy(&path, Some(&start), Some(&end), 1000);
1170
1171 assert!(estimate.total_cost > 0.0);
1172 assert!(estimate.confidence >= 0.0 && estimate.confidence <= 1.0);
1173 }
1174
1175 #[test]
1176 fn test_cost_based_optimizer_adaptive_learning() {
1177 let optimizer = CostBasedPathOptimizer::new();
1178 let path = PropertyPath::Direct(create_test_term("http://example.org/knows"));
1179
1180 optimizer.record_execution(&path, 50, 500);
1182 optimizer.record_execution(&path, 45, 480);
1183 optimizer.record_execution(&path, 55, 520);
1184
1185 let stats = optimizer.get_statistics(&path);
1187 assert!(stats.is_some());
1188
1189 let stats = stats.unwrap();
1190 assert_eq!(stats.evaluation_count, 3);
1191 assert!(stats.avg_cardinality > 0.0);
1192 }
1193
1194 #[test]
1195 fn test_cost_based_optimizer_strategy_selection() {
1196 let optimizer = CostBasedPathOptimizer::new();
1197
1198 let simple = PropertyPath::Direct(create_test_term("http://example.org/p"));
1200 let start = create_test_term("http://example.org/alice");
1201 let end = create_test_term("http://example.org/bob");
1202
1203 let estimate = optimizer.select_strategy(&simple, Some(&start), Some(&end), 100);
1204 assert!(matches!(
1205 estimate.strategy,
1206 PathEvaluationStrategy::ForwardBFS
1207 | PathEvaluationStrategy::BidirectionalBFS
1208 | PathEvaluationStrategy::IndexLookup
1209 ));
1210
1211 let recursive = PropertyPath::ZeroOrMore(Box::new(simple));
1213 let estimate = optimizer.select_strategy(&recursive, Some(&start), Some(&end), 100);
1214 assert!(estimate.estimated_time_us > 0.0);
1215 }
1216
1217 #[test]
1218 fn test_path_statistics_confidence() {
1219 let optimizer = CostBasedPathOptimizer::with_config(PathOptimizationConfig {
1220 min_samples: 5,
1221 ..Default::default()
1222 });
1223
1224 let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1225
1226 optimizer.record_execution(&path, 10, 100);
1228 let stats = optimizer.get_statistics(&path).unwrap();
1229 let confidence = if stats.evaluation_count < 5 {
1230 (stats.evaluation_count as f64) / 5.0
1231 } else {
1232 0.95
1233 };
1234 assert!(confidence < 0.5);
1235
1236 for _ in 0..6 {
1238 optimizer.record_execution(&path, 10, 100);
1239 }
1240 let stats = optimizer.get_statistics(&path).unwrap();
1241 assert!(stats.evaluation_count >= 5);
1242 }
1243
1244 #[test]
1245 fn test_bidirectional_strategy_requires_both_endpoints() {
1246 let optimizer = CostBasedPathOptimizer::new();
1247 let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1248 let start = create_test_term("http://example.org/alice");
1249
1250 let estimate = optimizer.select_strategy(&path, Some(&start), None, 100);
1252 assert!(
1254 estimate.total_cost.is_finite()
1255 || estimate.strategy != PathEvaluationStrategy::BidirectionalBFS
1256 );
1257 }
1258
1259 #[test]
1260 fn test_cost_optimizer_clear_statistics() {
1261 let optimizer = CostBasedPathOptimizer::new();
1262 let path = PropertyPath::Direct(create_test_term("http://example.org/p"));
1263
1264 optimizer.record_execution(&path, 10, 100);
1265 assert!(optimizer.get_statistics(&path).is_some());
1266
1267 optimizer.clear_statistics();
1268 let stats = optimizer.get_statistics(&path);
1270 assert!(stats.is_none() || stats.unwrap().evaluation_count == 0);
1271 }
1272
1273 #[test]
1274 fn test_path_evaluation_strategies() {
1275 let strategies = [
1277 PathEvaluationStrategy::ForwardBFS,
1278 PathEvaluationStrategy::BackwardBFS,
1279 PathEvaluationStrategy::BidirectionalBFS,
1280 PathEvaluationStrategy::IndexLookup,
1281 PathEvaluationStrategy::Direct,
1282 ];
1283
1284 for (i, s1) in strategies.iter().enumerate() {
1285 for (j, s2) in strategies.iter().enumerate() {
1286 if i == j {
1287 assert_eq!(s1, s2);
1288 } else {
1289 assert_ne!(s1, s2);
1290 }
1291 }
1292 }
1293 }
1294}