vtcode_core/tools/tree_sitter/
analysis.rs

1//! Code analysis capabilities using tree-sitter
2
3use crate::tools::tree_sitter::analyzer::{LanguageSupport, Position, SyntaxNode, SyntaxTree};
4use crate::tools::tree_sitter::languages::{LanguageAnalyzer, SymbolInfo, SymbolKind};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Comprehensive code analysis result
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct CodeAnalysis {
11    pub file_path: String,
12    pub language: LanguageSupport,
13    pub metrics: CodeMetrics,
14    pub symbols: Vec<SymbolInfo>,
15    pub dependencies: Vec<DependencyInfo>,
16    pub issues: Vec<AnalysisIssue>,
17    pub complexity: ComplexityMetrics,
18    pub structure: CodeStructure,
19}
20
21/// Code metrics
22#[derive(Debug, Clone, Serialize, Deserialize, Default)]
23pub struct CodeMetrics {
24    pub lines_of_code: usize,
25    pub lines_of_comments: usize,
26    pub blank_lines: usize,
27    pub functions_count: usize,
28    pub classes_count: usize,
29    pub variables_count: usize,
30    pub imports_count: usize,
31    pub comment_ratio: f64,
32}
33
34/// Dependency information
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct DependencyInfo {
37    pub name: String,
38    pub kind: DependencyKind,
39    pub source: String,
40    pub position: Position,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub enum DependencyKind {
45    Import,
46    Package,
47    Module,
48    External,
49}
50
51/// Analysis issues and suggestions
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct AnalysisIssue {
54    pub level: IssueLevel,
55    pub category: IssueCategory,
56    pub message: String,
57    pub position: Position,
58    pub suggestion: Option<String>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub enum IssueLevel {
63    Info,
64    Warning,
65    Error,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum IssueCategory {
70    Style,
71    Performance,
72    Security,
73    Complexity,
74    Maintainability,
75}
76
77/// Code complexity metrics
78#[derive(Debug, Clone, Serialize, Deserialize, Default)]
79pub struct ComplexityMetrics {
80    pub cyclomatic_complexity: usize,
81    pub cognitive_complexity: usize,
82    pub nesting_depth: usize,
83    pub function_length_average: f64,
84    pub function_length_max: usize,
85    pub parameters_average: f64,
86    pub parameters_max: usize,
87}
88
89/// Code structure information
90#[derive(Debug, Clone, Serialize, Deserialize, Default)]
91pub struct CodeStructure {
92    pub modules: Vec<String>,
93    pub functions: Vec<String>,
94    pub classes: Vec<String>,
95    pub hierarchy: HashMap<String, Vec<String>>, // parent -> children
96}
97
98/// Code analyzer using tree-sitter
99pub struct CodeAnalyzer {
100    language_analyzer: LanguageAnalyzer,
101}
102
103impl CodeAnalyzer {
104    pub fn new(language: &LanguageSupport) -> Self {
105        Self {
106            language_analyzer: LanguageAnalyzer::new(language),
107        }
108    }
109
110    /// Perform comprehensive code analysis
111    pub fn analyze(&self, syntax_tree: &SyntaxTree, file_path: &str) -> CodeAnalysis {
112        let symbols = self.language_analyzer.extract_symbols(syntax_tree);
113        let metrics = self.calculate_metrics(syntax_tree, &symbols);
114        let dependencies = self.extract_dependencies(syntax_tree);
115        let issues = self.analyze_issues(syntax_tree, &symbols);
116        let complexity = self.calculate_complexity(syntax_tree, &symbols);
117        let structure = self.analyze_structure(&symbols);
118
119        CodeAnalysis {
120            file_path: file_path.to_string(),
121            language: syntax_tree.language,
122            metrics,
123            symbols,
124            dependencies,
125            issues,
126            complexity,
127            structure,
128        }
129    }
130
131    /// Calculate basic code metrics
132    fn calculate_metrics(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> CodeMetrics {
133        let lines = tree.source_code.lines().collect::<Vec<_>>();
134        let total_lines = lines.len();
135
136        let mut comment_lines = 0;
137        let mut blank_lines = 0;
138
139        for line in &lines {
140            let trimmed = line.trim();
141            if trimmed.is_empty() {
142                blank_lines += 1;
143            } else if trimmed.starts_with("//")
144                || trimmed.starts_with("/*")
145                || trimmed.starts_with("#")
146                || trimmed.starts_with("'''")
147            {
148                comment_lines += 1;
149            }
150        }
151
152        let code_lines = total_lines - comment_lines - blank_lines;
153        let comment_ratio = if code_lines > 0 {
154            comment_lines as f64 / code_lines as f64
155        } else {
156            0.0
157        };
158
159        let functions_count = symbols
160            .iter()
161            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
162            .count();
163        let classes_count = symbols
164            .iter()
165            .filter(|s| {
166                matches!(
167                    s.kind,
168                    SymbolKind::Class | SymbolKind::Struct | SymbolKind::Interface
169                )
170            })
171            .count();
172        let variables_count = symbols
173            .iter()
174            .filter(|s| matches!(s.kind, SymbolKind::Variable))
175            .count();
176        let imports_count = symbols
177            .iter()
178            .filter(|s| matches!(s.kind, SymbolKind::Import))
179            .count();
180
181        CodeMetrics {
182            lines_of_code: code_lines,
183            lines_of_comments: comment_lines,
184            blank_lines,
185            functions_count,
186            classes_count,
187            variables_count,
188            imports_count,
189            comment_ratio,
190        }
191    }
192
193    /// Extract dependencies from the code
194    fn extract_dependencies(&self, tree: &SyntaxTree) -> Vec<DependencyInfo> {
195        let mut dependencies = Vec::new();
196
197        // Extract imports based on language
198        match tree.language {
199            LanguageSupport::Rust => {
200                self.extract_rust_dependencies(&tree.root, &mut dependencies);
201            }
202            LanguageSupport::Python => {
203                self.extract_python_dependencies(&tree.root, &mut dependencies);
204            }
205            LanguageSupport::JavaScript | LanguageSupport::TypeScript => {
206                self.extract_js_dependencies(&tree.root, &mut dependencies);
207            }
208            LanguageSupport::Go => {
209                self.extract_go_dependencies(&tree.root, &mut dependencies);
210            }
211            LanguageSupport::Java => {
212                self.extract_java_dependencies(&tree.root, &mut dependencies);
213            }
214            LanguageSupport::Swift => {
215                self.extract_swift_dependencies(&tree.root, &mut dependencies);
216            }
217        }
218
219        dependencies
220    }
221
222    fn extract_rust_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
223        if node.kind == "use_declaration"
224            && let Some(path_node) = node
225                .named_children
226                .get("argument")
227                .and_then(|children| children.first())
228        {
229            deps.push(DependencyInfo {
230                name: path_node.text.clone(),
231                kind: DependencyKind::Import,
232                source: "use".to_string(),
233                position: path_node.start_position.clone(),
234            });
235        }
236
237        for child in &node.children {
238            self.extract_rust_dependencies(child, deps);
239        }
240    }
241
242    fn extract_python_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
243        if node.kind == "import_statement" || node.kind == "import_from_statement" {
244            for child in &node.children {
245                if child.kind == "dotted_name" {
246                    deps.push(DependencyInfo {
247                        name: child.text.clone(),
248                        kind: DependencyKind::Import,
249                        source: "import".to_string(),
250                        position: child.start_position.clone(),
251                    });
252                }
253            }
254        }
255
256        for child in &node.children {
257            self.extract_python_dependencies(child, deps);
258        }
259    }
260
261    fn extract_js_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
262        if node.kind == "import_statement" {
263            for child in &node.children {
264                if child.kind == "string" {
265                    deps.push(DependencyInfo {
266                        name: child.text.clone(),
267                        kind: DependencyKind::Import,
268                        source: "import".to_string(),
269                        position: child.start_position.clone(),
270                    });
271                }
272            }
273        }
274
275        for child in &node.children {
276            self.extract_js_dependencies(child, deps);
277        }
278    }
279
280    fn extract_go_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
281        if node.kind == "import_declaration" {
282            for child in &node.children {
283                if let Some(spec_node) = child.named_children.get("spec") {
284                    if let Some(path_node) =
285                        spec_node.first().and_then(|n| n.named_children.get("path"))
286                    {
287                        if let Some(string_node) = path_node.first() {
288                            deps.push(DependencyInfo {
289                                name: string_node.text.clone(),
290                                kind: DependencyKind::Import,
291                                source: "import".to_string(),
292                                position: string_node.start_position.clone(),
293                            });
294                        }
295                    }
296                }
297            }
298        }
299
300        for child in &node.children {
301            self.extract_go_dependencies(child, deps);
302        }
303    }
304
305    fn extract_java_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
306        if node.kind == "import_declaration" {
307            for child in &node.children {
308                if let Some(name_node) = child.named_children.get("qualified_name") {
309                    if let Some(name) = name_node.first() {
310                        deps.push(DependencyInfo {
311                            name: name.text.clone(),
312                            kind: DependencyKind::Import,
313                            source: "import".to_string(),
314                            position: name.start_position.clone(),
315                        });
316                    }
317                }
318            }
319        }
320
321        for child in &node.children {
322            self.extract_java_dependencies(child, deps);
323        }
324    }
325
326    #[allow(dead_code)]
327    fn extract_swift_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
328        if node.kind == "import_declaration" {
329            for child in &node.children {
330                if let Some(path_node) = child.named_children.get("path") {
331                    if let Some(path) = path_node.first() {
332                        deps.push(DependencyInfo {
333                            name: path.text.clone(),
334                            kind: DependencyKind::Import,
335                            source: "import".to_string(),
336                            position: path.start_position.clone(),
337                        });
338                    }
339                }
340            }
341        }
342
343        for child in &node.children {
344            self.extract_swift_dependencies(child, deps);
345        }
346    }
347
348    /// Analyze code for potential issues
349    fn analyze_issues(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> Vec<AnalysisIssue> {
350        let mut issues = Vec::new();
351
352        // Check for long functions
353        for symbol in symbols {
354            if matches!(symbol.kind, SymbolKind::Function | SymbolKind::Method) {
355                if let Some(signature) = &symbol.signature {
356                    if signature.len() > 100 {
357                        issues.push(AnalysisIssue {
358                            level: IssueLevel::Info,
359                            category: IssueCategory::Maintainability,
360                            message: format!("Long function signature: {}", symbol.name),
361                            position: symbol.position.clone(),
362                            suggestion: Some(
363                                "Consider breaking down into smaller functions".to_string(),
364                            ),
365                        });
366                    }
367                }
368            }
369        }
370
371        // Check for high cyclomatic complexity (simplified)
372        let complexity = self.calculate_complexity(tree, symbols);
373        if complexity.cyclomatic_complexity > 10 {
374            issues.push(AnalysisIssue {
375                level: IssueLevel::Warning,
376                category: IssueCategory::Complexity,
377                message: format!(
378                    "High cyclomatic complexity: {}",
379                    complexity.cyclomatic_complexity
380                ),
381                position: Position {
382                    row: 0,
383                    column: 0,
384                    byte_offset: 0,
385                },
386                suggestion: Some("Consider refactoring to reduce complexity".to_string()),
387            });
388        }
389
390        // Check for missing documentation
391        for symbol in symbols {
392            if matches!(symbol.kind, SymbolKind::Function | SymbolKind::Class)
393                && symbol.documentation.is_none()
394            {
395                issues.push(AnalysisIssue {
396                    level: IssueLevel::Info,
397                    category: IssueCategory::Maintainability,
398                    message: format!("Missing documentation for: {}", symbol.name),
399                    position: symbol.position.clone(),
400                    suggestion: Some("Add documentation comments".to_string()),
401                });
402            }
403        }
404
405        issues
406    }
407
408    /// Calculate code complexity metrics
409    fn calculate_complexity(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> ComplexityMetrics {
410        let mut cyclomatic_complexity = 1; // Base complexity
411        let mut cognitive_complexity = 0;
412        let mut max_nesting_depth = 0;
413
414        // Calculate complexity based on language-specific constructs
415        self.calculate_language_complexity(
416            &tree.root,
417            &mut cyclomatic_complexity,
418            &mut cognitive_complexity,
419            0,
420            &mut max_nesting_depth,
421        );
422
423        let function_lengths: Vec<usize> = symbols
424            .iter()
425            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
426            .filter_map(|s| s.signature.as_ref().map(|sig| sig.lines().count()))
427            .collect();
428
429        let function_length_average = if !function_lengths.is_empty() {
430            function_lengths.iter().sum::<usize>() as f64 / function_lengths.len() as f64
431        } else {
432            0.0
433        };
434
435        let function_length_max = function_lengths.iter().cloned().max().unwrap_or(0);
436
437        // Calculate parameter statistics
438        let parameter_counts: Vec<usize> = symbols
439            .iter()
440            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
441            .filter_map(|s| s.signature.as_ref())
442            .filter_map(|sig| {
443                let start = sig.find('(')?;
444                let end = sig.find(')')?;
445                let params = &sig[start + 1..end];
446                Some(params.split(',').filter(|p| !p.trim().is_empty()).count())
447            })
448            .collect();
449
450        let parameters_average = if !parameter_counts.is_empty() {
451            parameter_counts.iter().sum::<usize>() as f64 / parameter_counts.len() as f64
452        } else {
453            0.0
454        };
455
456        let parameters_max = parameter_counts.iter().cloned().max().unwrap_or(0);
457
458        ComplexityMetrics {
459            cyclomatic_complexity,
460            cognitive_complexity,
461            nesting_depth: max_nesting_depth,
462            function_length_average,
463            function_length_max,
464            parameters_average,
465            parameters_max,
466        }
467    }
468
469    fn calculate_language_complexity(
470        &self,
471        node: &SyntaxNode,
472        cc: &mut usize,
473        cognitive: &mut usize,
474        depth: usize,
475        max_depth: &mut usize,
476    ) {
477        *max_depth = (*max_depth).max(depth);
478
479        // Language-specific complexity calculations
480        match node.kind.as_str() {
481            // Control flow increases cyclomatic complexity
482            k if k.contains("if") || k.contains("else") => {
483                *cc += 1;
484                *cognitive += 1;
485            }
486            k if k.contains("for") || k.contains("while") || k.contains("loop") => {
487                *cc += 1;
488                *cognitive += 2;
489            }
490            k if k.contains("switch") || k.contains("match") => {
491                *cc += node
492                    .named_children
493                    .get("body")
494                    .and_then(|children| Some(children.len().saturating_sub(1)))
495                    .unwrap_or(0);
496                *cognitive += 1;
497            }
498            k if k.contains("try") || k.contains("catch") => {
499                *cc += 1;
500                *cognitive += 1;
501            }
502            k if k.contains("function") || k.contains("method") => {
503                *cognitive += 1; // Function definition
504            }
505            _ => {}
506        }
507
508        // Recursively calculate for children
509        for child in &node.children {
510            self.calculate_language_complexity(child, cc, cognitive, depth + 1, max_depth);
511        }
512    }
513
514    /// Analyze code structure
515    fn analyze_structure(&self, symbols: &[SymbolInfo]) -> CodeStructure {
516        let mut modules = Vec::new();
517        let mut functions = Vec::new();
518        let mut classes = Vec::new();
519        let mut hierarchy = HashMap::new();
520
521        for symbol in symbols {
522            match &symbol.kind {
523                SymbolKind::Module => modules.push(symbol.name.clone()),
524                SymbolKind::Function => functions.push(symbol.name.clone()),
525                SymbolKind::Class | SymbolKind::Struct | SymbolKind::Interface => {
526                    classes.push(symbol.name.clone());
527                }
528                _ => {}
529            }
530
531            // Build hierarchy (simplified - in practice, this would be more sophisticated)
532            if let Some(scope) = &symbol.scope {
533                hierarchy
534                    .entry(scope.clone())
535                    .or_insert_with(Vec::new)
536                    .push(symbol.name.clone());
537            }
538        }
539
540        CodeStructure {
541            modules,
542            functions,
543            classes,
544            hierarchy,
545        }
546    }
547}
548
549/// Utility functions for code analysis
550pub struct AnalysisUtils;
551
552impl AnalysisUtils {
553    /// Calculate code duplication (simplified)
554    pub fn calculate_duplication(tree: &SyntaxTree) -> f64 {
555        // Implement a more sophisticated duplication analysis
556        // This looks for similar code structures in the tree
557
558        // Traverse the tree and count nodes
559        fn traverse_node(
560            node: &SyntaxNode,
561            node_counts: &mut std::collections::HashMap<String, usize>,
562        ) -> usize {
563            let mut count = 1; // Count this node
564
565            // Create a signature for this node based on its kind and children
566            let mut signature = node.kind.clone();
567
568            // Add children signatures
569            for child in &node.children {
570                let child_count = traverse_node(child, node_counts);
571                count += child_count;
572                signature.push_str(&format!("_{}", child.kind));
573            }
574
575            // Update the count for this signature
576            *node_counts.entry(signature).or_insert(0) += 1;
577
578            count
579        }
580
581        let mut node_counts = std::collections::HashMap::new();
582        let total_nodes = traverse_node(&tree.root, &mut node_counts);
583
584        // Count how many patterns appear more than once
585        let duplicate_patterns = node_counts.values().filter(|&&count| count > 1).count();
586
587        // Calculate duplication ratio
588        if total_nodes == 0 {
589            0.0
590        } else {
591            (duplicate_patterns as f64 / total_nodes as f64) * 100.0
592        }
593    }
594
595    /// Analyze code maintainability index
596    pub fn calculate_maintainability_index(analysis: &CodeAnalysis) -> f64 {
597        let metrics = &analysis.metrics;
598        let complexity = &analysis.complexity;
599
600        // Simplified maintainability index calculation
601        let halstead_volume =
602            metrics.lines_of_code as f64 * (metrics.functions_count as f64).log2();
603        let cyclomatic_complexity = complexity.cyclomatic_complexity as f64;
604        let lines_of_code = metrics.lines_of_code as f64;
605
606        let mi = 171.0
607            - 5.2 * halstead_volume.log2()
608            - 0.23 * cyclomatic_complexity
609            - 16.2 * lines_of_code.log2();
610        mi.max(0.0).min(171.0) // Clamp between 0 and 171
611    }
612
613    /// Generate code quality score
614    pub fn calculate_quality_score(analysis: &CodeAnalysis) -> f64 {
615        let mut score: f64 = 100.0;
616
617        // Deduct points for issues
618        for issue in &analysis.issues {
619            match issue.level {
620                IssueLevel::Error => score -= 20.0,
621                IssueLevel::Warning => score -= 10.0,
622                IssueLevel::Info => score -= 2.0,
623            }
624        }
625
626        // Bonus for good practices
627        if analysis.metrics.comment_ratio > 0.1 {
628            score += 5.0;
629        }
630
631        if analysis.complexity.cyclomatic_complexity < 5 {
632            score += 10.0;
633        }
634
635        score.max(0.0).min(100.0)
636    }
637}