codeprism_analysis/
performance.rs

1//! Performance analysis module
2
3use anyhow::Result;
4use regex::Regex;
5use serde_json::Value;
6use std::collections::HashMap;
7
8/// Performance issue information
9#[derive(Debug, Clone)]
10pub struct PerformanceIssue {
11    pub issue_type: String,
12    pub severity: String,
13    pub description: String,
14    pub location: Option<String>,
15    pub recommendation: String,
16    pub complexity_estimate: Option<String>,
17}
18
19/// Performance analyzer for code analysis
20pub struct PerformanceAnalyzer {
21    patterns: HashMap<String, Vec<PerformancePattern>>,
22}
23
24#[derive(Debug, Clone)]
25struct PerformancePattern {
26    name: String,
27    pattern: Regex,
28    severity: String,
29    description: String,
30    recommendation: String,
31    complexity: String,
32}
33
34impl PerformanceAnalyzer {
35    pub fn new() -> Self {
36        let mut analyzer = Self {
37            patterns: HashMap::new(),
38        };
39        analyzer.initialize_patterns();
40        analyzer
41    }
42
43    fn initialize_patterns(&mut self) {
44        // Time complexity patterns
45        let time_patterns = vec![
46            PerformancePattern {
47                name: "Nested Loop".to_string(),
48                pattern: Regex::new(r"(?s)for\s+.*?for\s+.*?for\s+").unwrap(),
49                severity: "high".to_string(),
50                description: "Triple nested loop detected - O(n³) complexity".to_string(),
51                recommendation: "Consider algorithmic optimization or data structure changes"
52                    .to_string(),
53                complexity: "O(n³)".to_string(),
54            },
55            PerformancePattern {
56                name: "Double Nested Loop".to_string(),
57                pattern: Regex::new(r"(?s)for\s+.*?for\s+").unwrap(),
58                severity: "medium".to_string(),
59                description: "Double nested loop detected - O(n²) complexity".to_string(),
60                recommendation: "Consider if this can be optimized to O(n log n) or O(n)"
61                    .to_string(),
62                complexity: "O(n²)".to_string(),
63            },
64            PerformancePattern {
65                name: "Inefficient String Concatenation".to_string(),
66                pattern: Regex::new(r"(?i)(str|string)\s*\+=\s*").unwrap(),
67                severity: "medium".to_string(),
68                description: "String concatenation in loop can be inefficient".to_string(),
69                recommendation: "Use StringBuilder, list.join(), or similar efficient methods"
70                    .to_string(),
71                complexity: "O(n²)".to_string(),
72            },
73        ];
74        self.patterns
75            .insert("time_complexity".to_string(), time_patterns);
76
77        // Memory usage patterns
78        let memory_patterns = vec![
79            PerformancePattern {
80                name: "Large Object Creation in Loop".to_string(),
81                pattern: Regex::new(r"(?s)for\s+.*?new\s+\w+\s*\(").unwrap(),
82                severity: "medium".to_string(),
83                description: "Object creation inside loop may cause memory pressure".to_string(),
84                recommendation: "Consider object pooling or moving creation outside loop"
85                    .to_string(),
86                complexity: "O(n)".to_string(),
87            },
88            PerformancePattern {
89                name: "Potential Memory Leak".to_string(),
90                pattern: Regex::new(r"(?i)(global|static)\s+\w+\s*=\s*\[\]").unwrap(),
91                severity: "high".to_string(),
92                description: "Global/static collection may grow indefinitely".to_string(),
93                recommendation: "Implement proper cleanup or use bounded collections".to_string(),
94                complexity: "O(n)".to_string(),
95            },
96        ];
97        self.patterns
98            .insert("memory_usage".to_string(), memory_patterns);
99
100        // Hot spots patterns
101        let hotspot_patterns = vec![
102            PerformancePattern {
103                name: "Database Query in Loop".to_string(),
104                pattern: Regex::new(
105                    r"(?s)for\s+.*?(query|execute|select|insert|update|delete)\s*\(",
106                )
107                .unwrap(),
108                severity: "critical".to_string(),
109                description: "Database query inside loop - N+1 query problem".to_string(),
110                recommendation: "Use batch operations or optimize with joins".to_string(),
111                complexity: "O(n)".to_string(),
112            },
113            PerformancePattern {
114                name: "File I/O in Loop".to_string(),
115                pattern: Regex::new(r"(?s)for\s+.*?(open|read|write|close)\s*\(").unwrap(),
116                severity: "high".to_string(),
117                description: "File I/O operations inside loop".to_string(),
118                recommendation: "Batch file operations or use streaming approaches".to_string(),
119                complexity: "O(n)".to_string(),
120            },
121            PerformancePattern {
122                name: "Network Call in Loop".to_string(),
123                pattern: Regex::new(r"(?s)for\s+.*?(http|fetch|request|get|post)\s*\(").unwrap(),
124                severity: "critical".to_string(),
125                description: "Network calls inside loop".to_string(),
126                recommendation: "Use batch APIs or parallel processing".to_string(),
127                complexity: "O(n)".to_string(),
128            },
129        ];
130        self.patterns
131            .insert("hot_spots".to_string(), hotspot_patterns);
132
133        // Anti-patterns
134        let antipattern_patterns = vec![
135            PerformancePattern {
136                name: "Premature Optimization".to_string(),
137                pattern: Regex::new(r"(?i)(micro.?optimization|premature.?optimization)").unwrap(),
138                severity: "low".to_string(),
139                description: "Potential premature optimization detected".to_string(),
140                recommendation: "Profile first, then optimize based on actual bottlenecks"
141                    .to_string(),
142                complexity: "Variable".to_string(),
143            },
144            PerformancePattern {
145                name: "Inefficient Collection Usage".to_string(),
146                pattern: Regex::new(r"(?i)(list|array)\.contains\s*\(.*?\)").unwrap(),
147                severity: "medium".to_string(),
148                description: "Linear search in collection".to_string(),
149                recommendation: "Consider using Set or HashMap for O(1) lookups".to_string(),
150                complexity: "O(n)".to_string(),
151            },
152        ];
153        self.patterns
154            .insert("anti_patterns".to_string(), antipattern_patterns);
155
156        // Scalability patterns
157        let scalability_patterns = vec![
158            PerformancePattern {
159                name: "Synchronous Processing".to_string(),
160                pattern: Regex::new(r"(?i)(synchronous|blocking|wait|sleep)\s*\(").unwrap(),
161                severity: "medium".to_string(),
162                description: "Synchronous operations may not scale well".to_string(),
163                recommendation: "Consider asynchronous processing for better scalability"
164                    .to_string(),
165                complexity: "O(1)".to_string(),
166            },
167            PerformancePattern {
168                name: "Single-threaded Processing".to_string(),
169                pattern: Regex::new(r"(?s)for\s+.*?(process|compute|calculate)\s*\([^)]*\)")
170                    .unwrap(),
171                severity: "low".to_string(),
172                description: "Sequential processing of independent tasks".to_string(),
173                recommendation: "Consider parallel processing for CPU-intensive tasks".to_string(),
174                complexity: "O(n)".to_string(),
175            },
176        ];
177        self.patterns
178            .insert("scalability".to_string(), scalability_patterns);
179    }
180
181    /// Analyze content for performance issues
182    pub fn analyze_content(
183        &self,
184        content: &str,
185        analysis_types: &[String],
186        complexity_threshold: &str,
187    ) -> Result<Vec<PerformanceIssue>> {
188        let mut issues = Vec::new();
189
190        let target_types = if analysis_types.contains(&"all".to_string()) {
191            self.patterns.keys().cloned().collect::<Vec<_>>()
192        } else {
193            analysis_types.to_vec()
194        };
195
196        for analysis_type in target_types {
197            if let Some(patterns) = self.patterns.get(&analysis_type) {
198                for pattern in patterns {
199                    if self.meets_complexity_threshold(&pattern.complexity, complexity_threshold) {
200                        if let Some(captures) = pattern.pattern.find(content) {
201                            issues.push(PerformanceIssue {
202                                issue_type: pattern.name.clone(),
203                                severity: pattern.severity.clone(),
204                                description: pattern.description.clone(),
205                                location: Some(format!("Position: {}", captures.start())),
206                                recommendation: pattern.recommendation.clone(),
207                                complexity_estimate: Some(pattern.complexity.clone()),
208                            });
209                        }
210                    }
211                }
212            }
213        }
214
215        Ok(issues)
216    }
217
218    /// Check if complexity meets threshold
219    fn meets_complexity_threshold(&self, complexity: &str, threshold: &str) -> bool {
220        // Simple complexity comparison - in practice, this would be more sophisticated
221        match threshold {
222            "low" => true, // Include all complexities
223            "medium" => !complexity.contains("O(1)"),
224            "high" => {
225                complexity.contains("O(n²)")
226                    || complexity.contains("O(n³)")
227                    || complexity.contains("O(2^n)")
228            }
229            _ => true,
230        }
231    }
232
233    /// Get performance recommendations
234    pub fn get_performance_recommendations(&self, issues: &[PerformanceIssue]) -> Vec<String> {
235        let mut recommendations = Vec::new();
236
237        if issues.is_empty() {
238            recommendations.push(
239                "No obvious performance issues detected. Continue monitoring with profiling tools."
240                    .to_string(),
241            );
242            return recommendations;
243        }
244
245        // Group by issue type
246        let mut issue_counts = HashMap::new();
247        for issue in issues {
248            *issue_counts.entry(issue.issue_type.clone()).or_insert(0) += 1;
249        }
250
251        // General recommendations based on found issues
252        if issue_counts.contains_key("Database Query in Loop") {
253            recommendations
254                .push("Optimize database access patterns to avoid N+1 query problems.".to_string());
255        }
256
257        if issue_counts.contains_key("Nested Loop")
258            || issue_counts.contains_key("Double Nested Loop")
259        {
260            recommendations.push(
261                "Review algorithmic complexity and consider more efficient algorithms.".to_string(),
262            );
263        }
264
265        if issue_counts.contains_key("Network Call in Loop") {
266            recommendations.push(
267                "Implement batch processing or async patterns for network operations.".to_string(),
268            );
269        }
270
271        if issue_counts.contains_key("Large Object Creation in Loop") {
272            recommendations.push(
273                "Consider object pooling or factory patterns to reduce allocation overhead."
274                    .to_string(),
275            );
276        }
277
278        recommendations
279            .push("Use profiling tools to identify actual bottlenecks in production.".to_string());
280        recommendations.push("Implement performance monitoring and alerting.".to_string());
281        recommendations
282            .push("Consider caching strategies for frequently accessed data.".to_string());
283
284        recommendations
285    }
286
287    /// Analyze time complexity issues
288    pub fn analyze_time_complexity(&self, content: &str) -> Result<Vec<Value>> {
289        let issues = self.analyze_content(content, &["time_complexity".to_string()], "low")?;
290
291        Ok(issues
292            .into_iter()
293            .map(|i| {
294                serde_json::json!({
295                    "type": i.issue_type,
296                    "severity": i.severity,
297                    "description": i.description,
298                    "location": i.location,
299                    "recommendation": i.recommendation,
300                    "complexity": i.complexity_estimate
301                })
302            })
303            .collect())
304    }
305
306    /// Analyze memory usage issues
307    pub fn analyze_memory_usage(&self, content: &str) -> Result<Vec<Value>> {
308        let issues = self.analyze_content(content, &["memory_usage".to_string()], "low")?;
309
310        Ok(issues
311            .into_iter()
312            .map(|i| {
313                serde_json::json!({
314                    "type": i.issue_type,
315                    "severity": i.severity,
316                    "description": i.description,
317                    "location": i.location,
318                    "recommendation": i.recommendation,
319                    "complexity": i.complexity_estimate
320                })
321            })
322            .collect())
323    }
324
325    /// Detect performance hot spots
326    pub fn detect_performance_hot_spots(&self, content: &str) -> Result<Vec<Value>> {
327        let issues = self.analyze_content(content, &["hot_spots".to_string()], "low")?;
328
329        Ok(issues
330            .into_iter()
331            .map(|i| {
332                serde_json::json!({
333                    "type": i.issue_type,
334                    "severity": i.severity,
335                    "description": i.description,
336                    "location": i.location,
337                    "recommendation": i.recommendation,
338                    "complexity": i.complexity_estimate
339                })
340            })
341            .collect())
342    }
343}
344
345impl Default for PerformanceAnalyzer {
346    fn default() -> Self {
347        Self::new()
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    #[test]
356    fn test_nested_loop_detection() {
357        let analyzer = PerformanceAnalyzer::new();
358
359        let code = "for i in range(n): for j in range(n): for k in range(n): pass";
360        let issues = analyzer
361            .analyze_content(code, &["time_complexity".to_string()], "low")
362            .unwrap();
363
364        assert!(!issues.is_empty());
365        assert!(issues.iter().any(|i| i.issue_type == "Nested Loop"));
366    }
367
368    #[test]
369    fn test_database_query_in_loop() {
370        let analyzer = PerformanceAnalyzer::new();
371
372        let code =
373            "for user in users:\n    query(\"SELECT * FROM orders WHERE user_id = ?\", user.id)";
374        let issues = analyzer
375            .analyze_content(code, &["hot_spots".to_string()], "low")
376            .unwrap();
377
378        assert!(!issues.is_empty());
379        assert!(issues
380            .iter()
381            .any(|i| i.issue_type == "Database Query in Loop"));
382    }
383
384    #[test]
385    fn test_string_concatenation() {
386        let analyzer = PerformanceAnalyzer::new();
387
388        let code = "result = \"\"; str += item";
389        let issues = analyzer
390            .analyze_content(code, &["time_complexity".to_string()], "low")
391            .unwrap();
392
393        assert!(!issues.is_empty());
394        assert!(issues
395            .iter()
396            .any(|i| i.issue_type == "Inefficient String Concatenation"));
397    }
398
399    #[test]
400    fn test_complexity_threshold() {
401        let analyzer = PerformanceAnalyzer::new();
402
403        assert!(analyzer.meets_complexity_threshold("O(n²)", "medium"));
404        assert!(!analyzer.meets_complexity_threshold("O(1)", "medium"));
405        assert!(analyzer.meets_complexity_threshold("O(n³)", "high"));
406    }
407
408    #[test]
409    fn test_performance_recommendations() {
410        let analyzer = PerformanceAnalyzer::new();
411
412        let issues = vec![PerformanceIssue {
413            issue_type: "Database Query in Loop".to_string(),
414            severity: "critical".to_string(),
415            description: "Test".to_string(),
416            location: None,
417            recommendation: "Test".to_string(),
418            complexity_estimate: Some("O(n)".to_string()),
419        }];
420
421        let recommendations = analyzer.get_performance_recommendations(&issues);
422        assert!(!recommendations.is_empty());
423    }
424}