Skip to main content

oxirs_core/query/
profiled_plan_builder.rs

1//! Profiled Query Plan Builder
2//!
3//! This module bridges the query profiler and query plan visualizer,
4//! automatically generating visual query plans from profiling data.
5//!
6//! # Features
7//! - Automatic query plan generation from profiling sessions
8//! - Real execution statistics overlay
9//! - Performance bottleneck highlighting
10//! - Optimization recommendations based on actual execution
11//!
12//! # Example
13//! ```rust,ignore
14//! use oxirs_core::query::profiled_plan_builder::ProfiledPlanBuilder;
15//! use oxirs_core::query::query_profiler::{QueryProfiler, ProfilerConfig};
16//!
17//! let profiler = QueryProfiler::new(ProfilerConfig::default());
18//! let session = profiler.start_session("SELECT ?s ?p ?o WHERE { ?s ?p ?o }");
19//! // ... execute query ...
20//! let stats = session.finish();
21//!
22//! let builder = ProfiledPlanBuilder::new();
23//! let plan = builder.build_from_stats(&stats, "SELECT query");
24//! let visualizer = QueryPlanVisualizer::new();
25//! println!("{}", visualizer.visualize_as_tree(&plan));
26//! ```
27
28use crate::query::query_plan_visualizer::{
29    HintSeverity, OptimizationHint, QueryPlanNode, QueryPlanVisualizer,
30};
31use crate::query::query_profiler::QueryStatistics;
32
33/// Builder for creating query plans from profiling data
34pub struct ProfiledPlanBuilder {
35    /// Visualizer for rendering plans
36    visualizer: QueryPlanVisualizer,
37    /// Whether to include optimization hints
38    include_hints: bool,
39}
40
41impl Default for ProfiledPlanBuilder {
42    fn default() -> Self {
43        Self::new()
44    }
45}
46
47impl ProfiledPlanBuilder {
48    /// Create a new profiled plan builder
49    pub fn new() -> Self {
50        Self {
51            visualizer: QueryPlanVisualizer::new(),
52            include_hints: true,
53        }
54    }
55
56    /// Enable or disable optimization hints
57    pub fn with_hints(mut self, enable: bool) -> Self {
58        self.include_hints = enable;
59        self
60    }
61
62    /// Build a query plan from profiling statistics
63    pub fn build_from_stats(&self, stats: &QueryStatistics, query_text: &str) -> QueryPlanNode {
64        let mut root = self.create_root_node(query_text, stats);
65
66        // Add parsing phase
67        if stats.parse_time_ms > 0 {
68            let parse_node = QueryPlanNode::new("Parse", "Query parsing")
69                .with_execution_time(stats.parse_time_ms * 1000) // Convert to μs
70                .with_metadata("phase", "parsing");
71            root.add_child(parse_node);
72        }
73
74        // Add planning phase
75        if stats.planning_time_ms > 0 {
76            let plan_node = QueryPlanNode::new("Planning", "Query optimization")
77                .with_execution_time(stats.planning_time_ms * 1000)
78                .with_metadata("phase", "planning");
79            root.add_child(plan_node);
80        }
81
82        // Add execution phase with pattern details
83        let mut exec_node = QueryPlanNode::new("Execution", "Query execution")
84            .with_execution_time(stats.execution_time_ms * 1000)
85            .with_actual_cardinality(stats.results_count as usize)
86            .with_metadata("phase", "execution");
87
88        // Add pattern matching details
89        for (pattern, count) in &stats.pattern_matches {
90            let pattern_node = QueryPlanNode::new("TriplePattern", pattern)
91                .with_actual_cardinality(*count as usize)
92                .with_metadata("matches", count.to_string());
93            exec_node.add_child(pattern_node);
94        }
95
96        // Add join operations
97        if stats.join_operations > 0 {
98            let join_node =
99                QueryPlanNode::new("Join", format!("{} join operations", stats.join_operations))
100                    .with_metadata("count", stats.join_operations.to_string());
101            exec_node.add_child(join_node);
102        }
103
104        // Add index usage
105        for (index, count) in &stats.index_accesses {
106            let index_node = QueryPlanNode::new("IndexScan", format!("Index: {}", index))
107                .with_actual_cardinality(*count as usize)
108                .with_index(index.clone())
109                .with_metadata("accesses", count.to_string());
110            exec_node.add_child(index_node);
111        }
112
113        root.add_child(exec_node);
114        root
115    }
116
117    /// Create root node with overall statistics
118    fn create_root_node(&self, query_text: &str, stats: &QueryStatistics) -> QueryPlanNode {
119        let description = if query_text.len() > 60 {
120            format!("{}...", &query_text[..57])
121        } else {
122            query_text.to_string()
123        };
124
125        QueryPlanNode::new("Query", description)
126            .with_execution_time(stats.total_time_ms * 1000) // Convert to μs
127            .with_actual_cardinality(stats.results_count as usize)
128            .with_metadata("total_triples", stats.triples_matched.to_string())
129            .with_metadata(
130                "cache_hit_rate",
131                format!("{:.1}%", stats.cache_hit_rate * 100.0),
132            )
133            .with_metadata(
134                "memory_peak",
135                format!("{}KB", stats.peak_memory_bytes / 1024),
136            )
137    }
138
139    /// Generate a complete profiling report with visualization
140    pub fn generate_report(&self, stats: &QueryStatistics, query_text: &str) -> ProfilingReport {
141        let plan = self.build_from_stats(stats, query_text);
142        let tree_visualization = self.visualizer.visualize_as_tree(&plan);
143        let summary = self.visualizer.generate_summary(&plan);
144
145        let hints = if self.include_hints {
146            self.visualizer.suggest_optimizations(&plan)
147        } else {
148            Vec::new()
149        };
150
151        ProfilingReport {
152            query: query_text.to_string(),
153            statistics: stats.clone(),
154            plan,
155            tree_visualization,
156            summary,
157            optimization_hints: hints,
158        }
159    }
160
161    /// Analyze query performance and generate recommendations
162    pub fn analyze_performance(&self, stats: &QueryStatistics) -> PerformanceAnalysis {
163        let mut analysis = PerformanceAnalysis {
164            is_slow: stats.total_time_ms > 1000,
165            slow_phases: Vec::new(),
166            inefficient_patterns: Vec::new(),
167            index_recommendations: Vec::new(),
168            cache_effectiveness: CacheEffectiveness::Unknown,
169            overall_grade: PerformanceGrade::Unknown,
170        };
171
172        // Analyze phases
173        if stats.parse_time_ms > stats.total_time_ms / 4 {
174            analysis
175                .slow_phases
176                .push(format!("Parsing is slow ({}ms)", stats.parse_time_ms));
177        }
178        if stats.planning_time_ms > stats.total_time_ms / 4 {
179            analysis
180                .slow_phases
181                .push(format!("Planning is slow ({}ms)", stats.planning_time_ms));
182        }
183        if stats.execution_time_ms > stats.total_time_ms / 2 {
184            analysis
185                .slow_phases
186                .push(format!("Execution is slow ({}ms)", stats.execution_time_ms));
187        }
188
189        // Analyze patterns
190        let total_matches: u64 = stats.pattern_matches.values().sum();
191        for (pattern, count) in &stats.pattern_matches {
192            if *count > 10000 {
193                analysis.inefficient_patterns.push(format!(
194                    "Pattern '{}' matched {} triples (consider adding selectivity)",
195                    pattern, count
196                ));
197            }
198        }
199
200        // Analyze index usage
201        if stats.index_accesses.is_empty() && total_matches > 1000 {
202            analysis.index_recommendations.push(
203                "No indexes used with large result set - consider adding indexes".to_string(),
204            );
205        }
206
207        // Analyze cache
208        analysis.cache_effectiveness = if stats.cache_hit_rate > 0.8 {
209            CacheEffectiveness::Excellent
210        } else if stats.cache_hit_rate > 0.5 {
211            CacheEffectiveness::Good
212        } else if stats.cache_hit_rate > 0.2 {
213            CacheEffectiveness::Fair
214        } else {
215            CacheEffectiveness::Poor
216        };
217
218        // Overall grade
219        analysis.overall_grade = self.calculate_grade(stats, &analysis);
220
221        analysis
222    }
223
224    /// Calculate overall performance grade
225    fn calculate_grade(
226        &self,
227        stats: &QueryStatistics,
228        analysis: &PerformanceAnalysis,
229    ) -> PerformanceGrade {
230        let mut score = 100.0;
231
232        // Penalize slow execution
233        if stats.total_time_ms > 5000 {
234            score -= 40.0;
235        } else if stats.total_time_ms > 1000 {
236            score -= 20.0;
237        } else if stats.total_time_ms > 100 {
238            score -= 5.0;
239        }
240
241        // Penalize inefficient patterns
242        score -= (analysis.inefficient_patterns.len() as f64 * 10.0).min(30.0);
243
244        // Penalize missing indexes
245        score -= (analysis.index_recommendations.len() as f64 * 15.0).min(20.0);
246
247        // Reward good cache usage
248        score += stats.cache_hit_rate as f64 * 10.0;
249
250        match score {
251            s if s >= 90.0 => PerformanceGrade::Excellent,
252            s if s >= 75.0 => PerformanceGrade::Good,
253            s if s >= 60.0 => PerformanceGrade::Fair,
254            s if s >= 40.0 => PerformanceGrade::Poor,
255            _ => PerformanceGrade::Critical,
256        }
257    }
258
259    /// Compare two profiling sessions
260    pub fn compare_executions(
261        &self,
262        baseline: &QueryStatistics,
263        current: &QueryStatistics,
264    ) -> ExecutionComparison {
265        let time_diff_pct = if baseline.total_time_ms > 0 {
266            ((current.total_time_ms as f64 - baseline.total_time_ms as f64)
267                / baseline.total_time_ms as f64)
268                * 100.0
269        } else {
270            0.0
271        };
272
273        let memory_diff_pct = if baseline.peak_memory_bytes > 0 {
274            ((current.peak_memory_bytes as f64 - baseline.peak_memory_bytes as f64)
275                / baseline.peak_memory_bytes as f64)
276                * 100.0
277        } else {
278            0.0
279        };
280
281        let improvement = if time_diff_pct < -5.0 {
282            ImprovementLevel::Significant
283        } else if time_diff_pct < 0.0 {
284            ImprovementLevel::Minor
285        } else if time_diff_pct < 5.0 {
286            ImprovementLevel::None
287        } else if time_diff_pct < 20.0 {
288            ImprovementLevel::Regression
289        } else {
290            ImprovementLevel::Critical
291        };
292
293        ExecutionComparison {
294            time_diff_ms: (current.total_time_ms as i64) - (baseline.total_time_ms as i64),
295            time_diff_pct,
296            memory_diff_bytes: (current.peak_memory_bytes as i64)
297                - (baseline.peak_memory_bytes as i64),
298            memory_diff_pct,
299            results_diff: (current.results_count as i64) - (baseline.results_count as i64),
300            cache_hit_diff: current.cache_hit_rate - baseline.cache_hit_rate,
301            improvement,
302        }
303    }
304}
305
306/// Complete profiling report with visualization
307#[derive(Debug)]
308pub struct ProfilingReport {
309    /// Original query text
310    pub query: String,
311    /// Profiling statistics
312    pub statistics: QueryStatistics,
313    /// Query plan
314    pub plan: QueryPlanNode,
315    /// ASCII tree visualization
316    pub tree_visualization: String,
317    /// Plan summary
318    pub summary: crate::query::query_plan_visualizer::QueryPlanSummary,
319    /// Optimization hints
320    pub optimization_hints: Vec<OptimizationHint>,
321}
322
323impl ProfilingReport {
324    /// Print a formatted report to stdout
325    pub fn print(&self) {
326        println!("=== Query Profiling Report ===\n");
327        println!("Query: {}\n", self.query);
328        println!("Execution Plan:\n{}", self.tree_visualization);
329        println!("\n{}", self.summary);
330
331        if !self.optimization_hints.is_empty() {
332            println!("\nOptimization Hints:");
333            println!("-------------------");
334            for hint in &self.optimization_hints {
335                let icon = match hint.severity {
336                    HintSeverity::Info => "ℹ️",
337                    HintSeverity::Warning => "⚠️",
338                    HintSeverity::Critical => "🔴",
339                };
340                println!("{} {}", icon, hint);
341            }
342        }
343    }
344}
345
346/// Performance analysis results
347#[derive(Debug)]
348pub struct PerformanceAnalysis {
349    /// Whether query is considered slow
350    pub is_slow: bool,
351    /// List of slow execution phases
352    pub slow_phases: Vec<String>,
353    /// Inefficient pattern descriptions
354    pub inefficient_patterns: Vec<String>,
355    /// Index recommendations
356    pub index_recommendations: Vec<String>,
357    /// Cache effectiveness rating
358    pub cache_effectiveness: CacheEffectiveness,
359    /// Overall performance grade
360    pub overall_grade: PerformanceGrade,
361}
362
363/// Cache effectiveness rating
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
365pub enum CacheEffectiveness {
366    Excellent, // > 80%
367    Good,      // > 50%
368    Fair,      // > 20%
369    Poor,      // <= 20%
370    Unknown,
371}
372
373/// Overall performance grade
374#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
375pub enum PerformanceGrade {
376    Critical,  // < 40 (lowest)
377    Poor,      // >= 40
378    Fair,      // >= 60
379    Good,      // >= 75
380    Excellent, // >= 90 (highest)
381    Unknown,
382}
383
384/// Comparison between two query executions
385#[derive(Debug)]
386pub struct ExecutionComparison {
387    /// Time difference in milliseconds
388    pub time_diff_ms: i64,
389    /// Time difference as percentage
390    pub time_diff_pct: f64,
391    /// Memory difference in bytes
392    pub memory_diff_bytes: i64,
393    /// Memory difference as percentage
394    pub memory_diff_pct: f64,
395    /// Results count difference
396    pub results_diff: i64,
397    /// Cache hit rate difference
398    pub cache_hit_diff: f32,
399    /// Overall improvement level
400    pub improvement: ImprovementLevel,
401}
402
403/// Improvement level classification
404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
405pub enum ImprovementLevel {
406    /// Significant improvement (>5% faster)
407    Significant,
408    /// Minor improvement (0-5% faster)
409    Minor,
410    /// No change (within 5%)
411    None,
412    /// Performance regression (5-20% slower)
413    Regression,
414    /// Critical regression (>20% slower)
415    Critical,
416}
417
418impl std::fmt::Display for ExecutionComparison {
419    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
420        writeln!(f, "Execution Comparison:")?;
421        writeln!(
422            f,
423            "  Time:   {:+}ms ({:+.1}%)",
424            self.time_diff_ms, self.time_diff_pct
425        )?;
426        writeln!(
427            f,
428            "  Memory: {:+}KB ({:+.1}%)",
429            self.memory_diff_bytes / 1024,
430            self.memory_diff_pct
431        )?;
432        writeln!(f, "  Results: {:+}", self.results_diff)?;
433        writeln!(f, "  Cache:   {:+.1}%", self.cache_hit_diff * 100.0)?;
434        writeln!(f, "  Overall: {:?}", self.improvement)?;
435        Ok(())
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442    use std::collections::HashMap;
443
444    fn create_sample_stats() -> QueryStatistics {
445        let mut pattern_matches = HashMap::new();
446        pattern_matches.insert("?s rdf:type foaf:Person".to_string(), 500);
447        pattern_matches.insert("?s foaf:name ?name".to_string(), 500);
448
449        let mut index_accesses = HashMap::new();
450        index_accesses.insert("SPO".to_string(), 2);
451
452        QueryStatistics {
453            total_time_ms: 150,
454            parse_time_ms: 10,
455            planning_time_ms: 20,
456            execution_time_ms: 120,
457            triples_matched: 1000,
458            results_count: 50,
459            peak_memory_bytes: 1024 * 1024, // 1MB
460            join_operations: 2,
461            cache_hit_rate: 0.75,
462            pattern_matches,
463            index_accesses,
464            ..Default::default()
465        }
466    }
467
468    #[test]
469    fn test_plan_builder_basic() {
470        let builder = ProfiledPlanBuilder::new();
471        let stats = create_sample_stats();
472        let plan = builder.build_from_stats(
473            &stats,
474            "SELECT ?s ?name WHERE { ?s a foaf:Person . ?s foaf:name ?name }",
475        );
476
477        // Should have root query node
478        assert_eq!(plan.node_type, "Query");
479        assert!(plan.execution_time_us.is_some());
480        assert_eq!(plan.actual_cardinality, Some(50));
481
482        // Should have child nodes for phases
483        assert!(!plan.children.is_empty());
484    }
485
486    #[test]
487    fn test_report_generation() {
488        let builder = ProfiledPlanBuilder::new();
489        let stats = create_sample_stats();
490        let report = builder.generate_report(&stats, "SELECT ?s ?p ?o WHERE { ?s ?p ?o }");
491
492        assert_eq!(report.query, "SELECT ?s ?p ?o WHERE { ?s ?p ?o }");
493        assert!(!report.tree_visualization.is_empty());
494        assert!(report.summary.total_nodes > 0);
495    }
496
497    #[test]
498    fn test_performance_analysis() {
499        let builder = ProfiledPlanBuilder::new();
500        let stats = create_sample_stats();
501        let analysis = builder.analyze_performance(&stats);
502
503        assert!(!analysis.is_slow); // 150ms is not slow
504        assert_eq!(analysis.cache_effectiveness, CacheEffectiveness::Good);
505    }
506
507    #[test]
508    fn test_slow_query_detection() {
509        let builder = ProfiledPlanBuilder::new();
510        let mut stats = create_sample_stats();
511        stats.total_time_ms = 5000; // 5 seconds - slow!
512        stats.cache_hit_rate = 0.1; // Poor cache usage
513        stats.index_accesses.clear(); // No index usage
514
515        // Add inefficient pattern
516        stats.pattern_matches.insert("?s ?p ?o".to_string(), 50000);
517
518        let analysis = builder.analyze_performance(&stats);
519        assert!(analysis.is_slow);
520        // With poor stats, should be Poor or Critical
521        assert!(matches!(
522            analysis.overall_grade,
523            PerformanceGrade::Poor | PerformanceGrade::Critical
524        ));
525    }
526
527    #[test]
528    fn test_execution_comparison() {
529        let builder = ProfiledPlanBuilder::new();
530        let baseline = create_sample_stats();
531
532        let mut improved = baseline.clone();
533        improved.total_time_ms = 100; // 33% faster
534
535        let comparison = builder.compare_executions(&baseline, &improved);
536        assert_eq!(comparison.time_diff_ms, -50);
537        assert!(comparison.time_diff_pct < 0.0);
538        assert_eq!(comparison.improvement, ImprovementLevel::Significant);
539    }
540
541    #[test]
542    fn test_regression_detection() {
543        let builder = ProfiledPlanBuilder::new();
544        let baseline = create_sample_stats();
545
546        let mut regressed = baseline.clone();
547        regressed.total_time_ms = 200; // 33% slower
548
549        let comparison = builder.compare_executions(&baseline, &regressed);
550        assert!(comparison.time_diff_ms > 0);
551        assert!(comparison.time_diff_pct > 20.0);
552        assert_eq!(comparison.improvement, ImprovementLevel::Critical);
553    }
554
555    #[test]
556    fn test_cache_effectiveness() {
557        let builder = ProfiledPlanBuilder::new();
558
559        let mut stats_excellent = create_sample_stats();
560        stats_excellent.cache_hit_rate = 0.9;
561        let analysis = builder.analyze_performance(&stats_excellent);
562        assert_eq!(analysis.cache_effectiveness, CacheEffectiveness::Excellent);
563
564        let mut stats_poor = create_sample_stats();
565        stats_poor.cache_hit_rate = 0.1;
566        let analysis = builder.analyze_performance(&stats_poor);
567        assert_eq!(analysis.cache_effectiveness, CacheEffectiveness::Poor);
568    }
569
570    #[test]
571    fn test_inefficient_pattern_detection() {
572        let builder = ProfiledPlanBuilder::new();
573        let mut stats = create_sample_stats();
574        stats.pattern_matches.insert("?s ?p ?o".to_string(), 50000); // Very broad pattern
575
576        let analysis = builder.analyze_performance(&stats);
577        assert!(!analysis.inefficient_patterns.is_empty());
578    }
579}