codeprism_mcp/tools/workflow/
optimization.rs

1//! Workflow optimization and performance enhancement
2//!
3//! Provides intelligent optimization suggestions for analysis workflows
4//! based on performance patterns, tool usage, and execution efficiency.
5
6use anyhow::Result;
7use serde_json::{json, Value};
8use std::collections::HashMap;
9
10use crate::context::session::SessionId;
11use crate::tools::{CallToolResult, Tool, ToolContent};
12use crate::CodePrismMcpServer;
13
14/// Create the optimize_workflow tool
15pub fn create_optimize_workflow_tool() -> Tool {
16    Tool {
17        name: "optimize_workflow".to_string(),
18        title: Some("Optimize Workflow".to_string()),
19        description: "Analyze and optimize analysis workflows for better performance and efficiency. Provides recommendations for tool sequencing, parallelization, and resource optimization.".to_string(),
20        input_schema: json!({
21            "type": "object",
22            "properties": {
23                "workflow_history": {
24                    "type": "array",
25                    "description": "History of tool calls to analyze",
26                    "items": {
27                        "type": "object",
28                        "properties": {
29                            "tool_name": {"type": "string"},
30                            "execution_time_ms": {"type": "integer"},
31                            "success": {"type": "boolean"},
32                            "parameters": {"type": "object"},
33                            "timestamp": {"type": "string"}
34                        },
35                        "required": ["tool_name", "execution_time_ms", "success"]
36                    }
37                },
38                "session_id": {
39                    "type": "string",
40                    "description": "Session ID to analyze (optional)"
41                },
42                "optimization_goals": {
43                    "type": "array",
44                    "description": "Optimization objectives",
45                    "items": {
46                        "type": "string",
47                        "enum": ["speed", "accuracy", "resource_usage", "parallelization", "user_experience"]
48                    },
49                    "default": ["speed", "user_experience"]
50                },
51                "target_performance": {
52                    "type": "object",
53                    "description": "Performance targets",
54                    "properties": {
55                        "max_total_time_minutes": {"type": "integer", "minimum": 1, "maximum": 60},
56                        "max_parallel_tools": {"type": "integer", "minimum": 1, "maximum": 10},
57                        "target_success_rate": {"type": "number", "minimum": 0.5, "maximum": 1.0}
58                    }
59                },
60                "constraints": {
61                    "type": "object",
62                    "description": "Optimization constraints",
63                    "properties": {
64                        "preserve_accuracy": {"type": "boolean", "default": true},
65                        "memory_limit_mb": {"type": "integer", "minimum": 100, "maximum": 2048},
66                        "must_include_tools": {
67                            "type": "array",
68                            "items": {"type": "string"},
69                            "description": "Tools that must be included in optimized workflow"
70                        },
71                        "exclude_tools": {
72                            "type": "array", 
73                            "items": {"type": "string"},
74                            "description": "Tools to exclude from optimization"
75                        }
76                    }
77                }
78            },
79            "additionalProperties": false
80        }),
81    }
82}
83
84/// Optimize workflow based on history and goals
85pub async fn optimize_workflow(
86    _server: &CodePrismMcpServer,
87    arguments: Option<&Value>,
88) -> Result<CallToolResult> {
89    let default_args = json!({});
90    let args = arguments.unwrap_or(&default_args);
91
92    let workflow_history = args.get("workflow_history").and_then(|v| v.as_array());
93
94    let session_id = args
95        .get("session_id")
96        .and_then(|v| v.as_str())
97        .map(|s| SessionId(s.to_string()));
98
99    let optimization_goals = args
100        .get("optimization_goals")
101        .and_then(|v| v.as_array())
102        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect::<Vec<_>>())
103        .unwrap_or_else(|| vec!["speed", "user_experience"]);
104
105    let target_performance = args.get("target_performance");
106    let constraints = args.get("constraints");
107
108    // Analyze workflow data
109    let analysis_data = if let Some(history) = workflow_history {
110        analyze_workflow_history(history)?
111    } else if let Some(session_id) = session_id {
112        analyze_session_workflow(session_id)?
113    } else {
114        return Err(anyhow::anyhow!(
115            "Either workflow_history or session_id must be provided"
116        ));
117    };
118
119    // Generate optimization recommendations
120    let optimizations = generate_optimization_recommendations(
121        &analysis_data,
122        &optimization_goals,
123        target_performance,
124        constraints,
125    )?;
126
127    // Create optimization report
128    let mut result = json!({
129        "optimization_analysis": {
130            "current_performance": analysis_data.performance_metrics,
131            "identified_issues": analysis_data.issues,
132            "optimization_potential": analysis_data.optimization_potential,
133            "bottlenecks": analysis_data.bottlenecks
134        },
135        "optimization_recommendations": optimizations.recommendations,
136        "optimized_workflow": {
137            "tool_sequence": optimizations.optimized_sequence,
138            "parallel_groups": optimizations.parallel_groups,
139            "estimated_improvement": optimizations.estimated_improvement,
140            "execution_strategy": optimizations.execution_strategy
141        },
142        "implementation_guide": {
143            "quick_wins": optimizations.quick_wins,
144            "advanced_optimizations": optimizations.advanced_optimizations,
145            "migration_steps": optimizations.migration_steps,
146            "testing_recommendations": optimizations.testing_recommendations
147        }
148    });
149
150    // Add performance projections
151    result["performance_projections"] = json!({
152        "current_metrics": analysis_data.performance_metrics,
153        "optimized_metrics": optimizations.projected_metrics,
154        "improvement_summary": {
155            "time_reduction_percent": optimizations.estimated_improvement.time_reduction,
156            "efficiency_gain": optimizations.estimated_improvement.efficiency_gain,
157            "resource_savings": optimizations.estimated_improvement.resource_savings
158        }
159    });
160
161    Ok(CallToolResult {
162        content: vec![ToolContent::Text {
163            text: serde_json::to_string_pretty(&result)?,
164        }],
165        is_error: Some(false),
166    })
167}
168
169/// Workflow analysis data
170#[derive(Debug, Clone)]
171struct WorkflowAnalysis {
172    performance_metrics: PerformanceMetrics,
173    issues: Vec<WorkflowIssue>,
174    optimization_potential: f64,
175    bottlenecks: Vec<Bottleneck>,
176    tool_usage_patterns: HashMap<String, ToolUsagePattern>,
177}
178
179/// Performance metrics
180#[derive(Debug, Clone, serde::Serialize)]
181struct PerformanceMetrics {
182    total_execution_time_ms: u64,
183    average_tool_time_ms: f64,
184    success_rate: f64,
185    parallelization_efficiency: f64,
186    resource_utilization: f64,
187    tool_count: usize,
188}
189
190/// Workflow issue
191#[derive(Debug, Clone, serde::Serialize)]
192struct WorkflowIssue {
193    issue_type: String,
194    severity: String,
195    description: String,
196    affected_tools: Vec<String>,
197    impact: String,
198}
199
200/// Performance bottleneck
201#[derive(Debug, Clone, serde::Serialize)]
202struct Bottleneck {
203    bottleneck_type: String,
204    location: String,
205    impact_ms: u64,
206    recommendation: String,
207}
208
209/// Tool usage pattern
210#[derive(Debug, Clone, serde::Serialize)]
211struct ToolUsagePattern {
212    frequency: u32,
213    average_execution_time_ms: f64,
214    success_rate: f64,
215    common_parameters: HashMap<String, Value>,
216    dependencies: Vec<String>,
217}
218
219/// Optimization recommendations
220#[derive(Debug, Clone)]
221struct OptimizationRecommendations {
222    recommendations: Vec<OptimizationRecommendation>,
223    optimized_sequence: Vec<OptimizedToolStep>,
224    parallel_groups: Vec<Vec<String>>,
225    estimated_improvement: ImprovementEstimate,
226    execution_strategy: String,
227    quick_wins: Vec<String>,
228    advanced_optimizations: Vec<String>,
229    migration_steps: Vec<String>,
230    testing_recommendations: Vec<String>,
231    projected_metrics: PerformanceMetrics,
232}
233
234/// Individual optimization recommendation
235#[derive(Debug, Clone, serde::Serialize)]
236struct OptimizationRecommendation {
237    optimization_type: String,
238    priority: String,
239    description: String,
240    implementation: String,
241    expected_benefit: String,
242    effort_level: String,
243    tools_affected: Vec<String>,
244}
245
246/// Optimized tool step
247#[derive(Debug, Clone, serde::Serialize)]
248struct OptimizedToolStep {
249    tool_name: String,
250    execution_order: u32,
251    parallel_group: Option<u32>,
252    optimized_parameters: Value,
253    rationale: String,
254    expected_time_ms: u64,
255}
256
257/// Improvement estimate
258#[derive(Debug, Clone, serde::Serialize)]
259struct ImprovementEstimate {
260    time_reduction: f64,
261    efficiency_gain: f64,
262    resource_savings: f64,
263    confidence: f64,
264}
265
266/// Analyze workflow history from provided data
267fn analyze_workflow_history(history: &[Value]) -> Result<WorkflowAnalysis> {
268    let mut total_time = 0u64;
269    let mut successful_tools = 0;
270    let mut total_tools = 0;
271    let mut tool_times = HashMap::new();
272    let mut tool_success = HashMap::new();
273
274    for entry in history {
275        let tool_name = entry
276            .get("tool_name")
277            .and_then(|v| v.as_str())
278            .unwrap_or("unknown");
279
280        let execution_time = entry
281            .get("execution_time_ms")
282            .and_then(|v| v.as_u64())
283            .unwrap_or(0);
284
285        let success = entry
286            .get("success")
287            .and_then(|v| v.as_bool())
288            .unwrap_or(false);
289
290        total_time += execution_time;
291        total_tools += 1;
292
293        if success {
294            successful_tools += 1;
295        }
296
297        // Track tool-specific metrics
298        let times = tool_times
299            .entry(tool_name.to_string())
300            .or_insert_with(Vec::new);
301        times.push(execution_time);
302
303        let successes = tool_success
304            .entry(tool_name.to_string())
305            .or_insert_with(|| (0, 0));
306        successes.1 += 1; // total count
307        if success {
308            successes.0 += 1; // success count
309        }
310    }
311
312    let average_time = if total_tools > 0 {
313        total_time as f64 / total_tools as f64
314    } else {
315        0.0
316    };
317    let success_rate = if total_tools > 0 {
318        successful_tools as f64 / total_tools as f64
319    } else {
320        0.0
321    };
322
323    // Identify issues and bottlenecks
324    let issues = identify_workflow_issues(&tool_times, &tool_success, success_rate);
325    let bottlenecks = identify_bottlenecks(&tool_times);
326
327    // Calculate optimization potential
328    let optimization_potential = calculate_optimization_potential(&tool_times, success_rate);
329
330    // Create tool usage patterns
331    let mut tool_usage_patterns = HashMap::new();
332    for (tool_name, times) in &tool_times {
333        let avg_time = times.iter().sum::<u64>() as f64 / times.len() as f64;
334        let (successes, total) = tool_success.get(tool_name).unwrap_or(&(0, 0));
335        let tool_success_rate = if *total > 0 {
336            *successes as f64 / *total as f64
337        } else {
338            0.0
339        };
340
341        tool_usage_patterns.insert(
342            tool_name.clone(),
343            ToolUsagePattern {
344                frequency: times.len() as u32,
345                average_execution_time_ms: avg_time,
346                success_rate: tool_success_rate,
347                common_parameters: HashMap::new(),
348                dependencies: vec![],
349            },
350        );
351    }
352
353    Ok(WorkflowAnalysis {
354        performance_metrics: PerformanceMetrics {
355            total_execution_time_ms: total_time,
356            average_tool_time_ms: average_time,
357            success_rate,
358            parallelization_efficiency: estimate_parallelization_efficiency(&tool_times),
359            resource_utilization: 0.8, // Placeholder
360            tool_count: total_tools,
361        },
362        issues,
363        optimization_potential,
364        bottlenecks,
365        tool_usage_patterns,
366    })
367}
368
369/// Analyze session workflow (placeholder implementation)
370fn analyze_session_workflow(_session_id: SessionId) -> Result<WorkflowAnalysis> {
371    // In real implementation, would fetch session data
372    Ok(WorkflowAnalysis {
373        performance_metrics: PerformanceMetrics {
374            total_execution_time_ms: 30000,
375            average_tool_time_ms: 5000.0,
376            success_rate: 0.85,
377            parallelization_efficiency: 0.6,
378            resource_utilization: 0.7,
379            tool_count: 6,
380        },
381        issues: vec![],
382        optimization_potential: 0.7,
383        bottlenecks: vec![],
384        tool_usage_patterns: HashMap::new(),
385    })
386}
387
388/// Identify workflow issues
389fn identify_workflow_issues(
390    tool_times: &HashMap<String, Vec<u64>>,
391    tool_success: &HashMap<String, (u32, u32)>,
392    _overall_success_rate: f64,
393) -> Vec<WorkflowIssue> {
394    let mut issues = Vec::new();
395
396    // Check for slow tools
397    for (tool_name, times) in tool_times {
398        let avg_time = times.iter().sum::<u64>() as f64 / times.len() as f64;
399        if avg_time > 15000.0 {
400            // > 15 seconds
401            issues.push(WorkflowIssue {
402                issue_type: "performance".to_string(),
403                severity: "medium".to_string(),
404                description: format!("{} has high average execution time", tool_name),
405                affected_tools: vec![tool_name.clone()],
406                impact: "Significantly increases total workflow time".to_string(),
407            });
408        }
409    }
410
411    // Check for tools with low success rates
412    for (tool_name, (successes, total)) in tool_success {
413        let success_rate = *successes as f64 / *total as f64;
414        if success_rate < 0.8 {
415            issues.push(WorkflowIssue {
416                issue_type: "reliability".to_string(),
417                severity: "high".to_string(),
418                description: format!(
419                    "{} has low success rate: {:.1}%",
420                    tool_name,
421                    success_rate * 100.0
422                ),
423                affected_tools: vec![tool_name.clone()],
424                impact: "May cause workflow failures and require retries".to_string(),
425            });
426        }
427    }
428
429    // Check for sequential execution inefficiency
430    if tool_times.len() > 2 {
431        issues.push(WorkflowIssue {
432            issue_type: "parallelization".to_string(),
433            severity: "low".to_string(),
434            description: "Multiple tools could potentially run in parallel".to_string(),
435            affected_tools: tool_times.keys().cloned().collect(),
436            impact: "Sequential execution may be unnecessarily slow".to_string(),
437        });
438    }
439
440    issues
441}
442
443/// Identify performance bottlenecks
444fn identify_bottlenecks(tool_times: &HashMap<String, Vec<u64>>) -> Vec<Bottleneck> {
445    let mut bottlenecks = Vec::new();
446
447    for (tool_name, times) in tool_times {
448        let max_time = times.iter().max().unwrap_or(&0);
449        let avg_time = times.iter().sum::<u64>() as f64 / times.len() as f64;
450
451        if *max_time > avg_time as u64 * 2 {
452            bottlenecks.push(Bottleneck {
453                bottleneck_type: "execution_variance".to_string(),
454                location: tool_name.clone(),
455                impact_ms: *max_time - avg_time as u64,
456                recommendation: "Investigate inconsistent execution times".to_string(),
457            });
458        }
459
460        if avg_time > 10000.0 {
461            bottlenecks.push(Bottleneck {
462                bottleneck_type: "slow_tool".to_string(),
463                location: tool_name.clone(),
464                impact_ms: avg_time as u64,
465                recommendation: "Consider optimizing parameters or breaking into smaller operations".to_string(),
466            });
467        }
468    }
469
470    bottlenecks
471}
472
473/// Calculate optimization potential
474fn calculate_optimization_potential(
475    tool_times: &HashMap<String, Vec<u64>>,
476    success_rate: f64,
477) -> f64 {
478    let mut potential = 0.0;
479
480    // Parallelization potential
481    if tool_times.len() > 1 {
482        potential += 0.3; // 30% improvement from parallelization
483    }
484
485    // Success rate improvement potential
486    if success_rate < 0.9 {
487        potential += (0.9 - success_rate) * 0.5; // Potential from improving reliability
488    }
489
490    // Time optimization potential
491    let total_time: u64 = tool_times.values().flatten().sum();
492    let tool_count = tool_times.values().map(|times| times.len()).sum::<usize>();
493    if tool_count > 0 {
494        let avg_time = total_time as f64 / tool_count as f64;
495        if avg_time > 5000.0 {
496            potential += 0.2; // 20% improvement from time optimization
497        }
498    }
499
500    potential.min(1.0)
501}
502
503/// Estimate parallelization efficiency
504fn estimate_parallelization_efficiency(tool_times: &HashMap<String, Vec<u64>>) -> f64 {
505    if tool_times.len() <= 1 {
506        return 1.0;
507    }
508
509    // Simple estimation based on tool count and average times
510    let avg_times: Vec<f64> = tool_times
511        .values()
512        .map(|times| times.iter().sum::<u64>() as f64 / times.len() as f64)
513        .collect();
514
515    let max_time = avg_times.iter().fold(0.0f64, |acc, &x| acc.max(x));
516    let total_time: f64 = avg_times.iter().sum();
517
518    if max_time > 0.0 {
519        max_time / total_time
520    } else {
521        1.0
522    }
523}
524
525/// Generate optimization recommendations
526fn generate_optimization_recommendations(
527    analysis: &WorkflowAnalysis,
528    goals: &[&str],
529    _target_performance: Option<&Value>,
530    _constraints: Option<&Value>,
531) -> Result<OptimizationRecommendations> {
532    let mut recommendations = Vec::new();
533    let mut quick_wins = Vec::new();
534    let mut advanced_optimizations = Vec::new();
535
536    // Speed optimization recommendations
537    if goals.contains(&"speed") {
538        if analysis.performance_metrics.parallelization_efficiency < 0.8 {
539            recommendations.push(OptimizationRecommendation {
540                optimization_type: "parallelization".to_string(),
541                priority: "high".to_string(),
542                description: "Execute compatible tools in parallel to reduce total time".to_string(),
543                implementation: "Group analysis tools (complexity, security, performance) for parallel execution".to_string(),
544                expected_benefit: "30-50% reduction in total execution time".to_string(),
545                effort_level: "medium".to_string(),
546                tools_affected: analysis.tool_usage_patterns.keys().cloned().collect(),
547            });
548            quick_wins.push("Enable parallel execution for analysis tools".to_string());
549        }
550
551        // Tool-specific optimizations
552        for (tool_name, pattern) in &analysis.tool_usage_patterns {
553            if pattern.average_execution_time_ms > 10000.0 {
554                recommendations.push(OptimizationRecommendation {
555                    optimization_type: "parameter_optimization".to_string(),
556                    priority: "medium".to_string(),
557                    description: format!(
558                        "Optimize {} parameters to reduce execution time",
559                        tool_name
560                    ),
561                    implementation: "Reduce scope or adjust analysis depth".to_string(),
562                    expected_benefit: "20-40% reduction in tool execution time".to_string(),
563                    effort_level: "low".to_string(),
564                    tools_affected: vec![tool_name.clone()],
565                });
566                quick_wins.push(format!(
567                    "Optimize {} parameters for faster execution",
568                    tool_name
569                ));
570            }
571        }
572    }
573
574    // Resource optimization
575    if goals.contains(&"resource_usage") {
576        advanced_optimizations
577            .push("Implement intelligent caching for expensive operations".to_string());
578        advanced_optimizations.push("Use lazy loading for large result sets".to_string());
579    }
580
581    // Create optimized sequence
582    let optimized_sequence = create_optimized_sequence(&analysis.tool_usage_patterns)?;
583    let parallel_groups = identify_parallel_groups(&analysis.tool_usage_patterns);
584
585    // Estimate improvements
586    let time_reduction = if analysis.performance_metrics.parallelization_efficiency < 0.8 {
587        0.4
588    } else {
589        0.2
590    };
591    let estimated_improvement = ImprovementEstimate {
592        time_reduction,
593        efficiency_gain: 0.25,
594        resource_savings: 0.15,
595        confidence: 0.8,
596    };
597
598    // Project optimized metrics
599    let projected_metrics = PerformanceMetrics {
600        total_execution_time_ms: (analysis.performance_metrics.total_execution_time_ms as f64
601            * (1.0 - time_reduction)) as u64,
602        average_tool_time_ms: analysis.performance_metrics.average_tool_time_ms * 0.8,
603        success_rate: (analysis.performance_metrics.success_rate + 0.1).min(1.0),
604        parallelization_efficiency: (analysis.performance_metrics.parallelization_efficiency + 0.3)
605            .min(1.0),
606        resource_utilization: analysis.performance_metrics.resource_utilization * 0.85,
607        tool_count: analysis.performance_metrics.tool_count,
608    };
609
610    Ok(OptimizationRecommendations {
611        recommendations,
612        optimized_sequence,
613        parallel_groups,
614        estimated_improvement,
615        execution_strategy: "optimized_parallel".to_string(),
616        quick_wins,
617        advanced_optimizations,
618        migration_steps: vec![
619            "1. Test parallel execution with non-critical tools".to_string(),
620            "2. Gradually increase parallelization based on results".to_string(),
621            "3. Implement caching for frequently used operations".to_string(),
622            "4. Monitor performance metrics and adjust as needed".to_string(),
623        ],
624        testing_recommendations: vec![
625            "A/B test optimized vs original workflow".to_string(),
626            "Monitor success rates during optimization rollout".to_string(),
627            "Validate result quality remains consistent".to_string(),
628        ],
629        projected_metrics,
630    })
631}
632
633/// Create optimized tool sequence
634fn create_optimized_sequence(
635    tool_patterns: &HashMap<String, ToolUsagePattern>,
636) -> Result<Vec<OptimizedToolStep>> {
637    let mut sequence = Vec::new();
638    let mut order = 1;
639
640    // Repository stats should typically run first
641    if tool_patterns.contains_key("repository_stats") {
642        sequence.push(OptimizedToolStep {
643            tool_name: "repository_stats".to_string(),
644            execution_order: order,
645            parallel_group: None,
646            optimized_parameters: json!({}),
647            rationale: "Provides context for subsequent analysis".to_string(),
648            expected_time_ms: tool_patterns
649                .get("repository_stats")
650                .map(|p| p.average_execution_time_ms as u64)
651                .unwrap_or(2000),
652        });
653        order += 1;
654    }
655
656    // Group analysis tools for parallel execution
657    let analysis_tools = [
658        "analyze_complexity",
659        "analyze_security",
660        "analyze_performance",
661    ];
662    let parallel_group_id = 1;
663
664    for tool_name in &analysis_tools {
665        if let Some(pattern) = tool_patterns.get(*tool_name) {
666            sequence.push(OptimizedToolStep {
667                tool_name: tool_name.to_string(),
668                execution_order: order,
669                parallel_group: Some(parallel_group_id),
670                optimized_parameters: json!({}),
671                rationale: "Can run in parallel with other analysis tools".to_string(),
672                expected_time_ms: (pattern.average_execution_time_ms * 0.8) as u64,
673            });
674        }
675    }
676
677    if sequence
678        .iter()
679        .any(|s| s.parallel_group == Some(parallel_group_id))
680    {
681        order += 1;
682        // parallel_group_id would be incremented here for future groups
683    }
684
685    // Add remaining tools
686    for (tool_name, pattern) in tool_patterns {
687        if !sequence.iter().any(|s| s.tool_name == *tool_name) {
688            sequence.push(OptimizedToolStep {
689                tool_name: tool_name.clone(),
690                execution_order: order,
691                parallel_group: None,
692                optimized_parameters: json!({}),
693                rationale: "Sequential execution based on dependencies".to_string(),
694                expected_time_ms: pattern.average_execution_time_ms as u64,
695            });
696            order += 1;
697        }
698    }
699
700    Ok(sequence)
701}
702
703/// Identify tools that can run in parallel
704fn identify_parallel_groups(tool_patterns: &HashMap<String, ToolUsagePattern>) -> Vec<Vec<String>> {
705    let mut groups = Vec::new();
706
707    // Analysis tools group
708    let analysis_tools: Vec<String> = [
709        "analyze_complexity",
710        "analyze_security",
711        "analyze_performance",
712    ]
713    .iter()
714    .filter(|tool| tool_patterns.contains_key(**tool))
715    .map(|tool| tool.to_string())
716    .collect();
717
718    if analysis_tools.len() > 1 {
719        groups.push(analysis_tools);
720    }
721
722    // Search tools group
723    let search_tools: Vec<String> = ["search_symbols", "search_content", "find_files"]
724        .iter()
725        .filter(|tool| tool_patterns.contains_key(**tool))
726        .map(|tool| tool.to_string())
727        .collect();
728
729    if search_tools.len() > 1 {
730        groups.push(search_tools);
731    }
732
733    groups
734}