vtcode_core/tools/tree_sitter/
analysis.rs

1//! Code analysis capabilities using tree-sitter
2
3use crate::tools::tree_sitter::analyzer::{LanguageSupport, Position, SyntaxNode, SyntaxTree};
4use crate::tools::tree_sitter::languages::{LanguageAnalyzer, SymbolInfo, SymbolKind};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Comprehensive code analysis result
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct CodeAnalysis {
11    pub file_path: String,
12    pub language: LanguageSupport,
13    pub metrics: CodeMetrics,
14    pub symbols: Vec<SymbolInfo>,
15    pub dependencies: Vec<DependencyInfo>,
16    pub issues: Vec<AnalysisIssue>,
17    pub complexity: ComplexityMetrics,
18    pub structure: CodeStructure,
19}
20
21/// Code metrics
22#[derive(Debug, Clone, Serialize, Deserialize, Default)]
23pub struct CodeMetrics {
24    pub lines_of_code: usize,
25    pub lines_of_comments: usize,
26    pub blank_lines: usize,
27    pub functions_count: usize,
28    pub classes_count: usize,
29    pub variables_count: usize,
30    pub imports_count: usize,
31    pub comment_ratio: f64,
32}
33
34/// Dependency information
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct DependencyInfo {
37    pub name: String,
38    pub kind: DependencyKind,
39    pub source: String,
40    pub position: Position,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub enum DependencyKind {
45    Import,
46    Package,
47    Module,
48    External,
49}
50
51/// Analysis issues and suggestions
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct AnalysisIssue {
54    pub level: IssueLevel,
55    pub category: IssueCategory,
56    pub message: String,
57    pub position: Position,
58    pub suggestion: Option<String>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub enum IssueLevel {
63    Info,
64    Warning,
65    Error,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum IssueCategory {
70    Style,
71    Performance,
72    Security,
73    Complexity,
74    Maintainability,
75}
76
77/// Code complexity metrics
78#[derive(Debug, Clone, Serialize, Deserialize, Default)]
79pub struct ComplexityMetrics {
80    pub cyclomatic_complexity: usize,
81    pub cognitive_complexity: usize,
82    pub nesting_depth: usize,
83    pub function_length_average: f64,
84    pub function_length_max: usize,
85    pub parameters_average: f64,
86    pub parameters_max: usize,
87}
88
89/// Code structure information
90#[derive(Debug, Clone, Serialize, Deserialize, Default)]
91pub struct CodeStructure {
92    pub modules: Vec<String>,
93    pub functions: Vec<String>,
94    pub classes: Vec<String>,
95    pub hierarchy: HashMap<String, Vec<String>>, // parent -> children
96}
97
98/// Code analyzer using tree-sitter
99pub struct CodeAnalyzer {
100    language_analyzer: LanguageAnalyzer,
101}
102
103impl CodeAnalyzer {
104    pub fn new(language: &LanguageSupport) -> Self {
105        Self {
106            language_analyzer: LanguageAnalyzer::new(language),
107        }
108    }
109
110    /// Perform comprehensive code analysis
111    pub fn analyze(&self, syntax_tree: &SyntaxTree, file_path: &str) -> CodeAnalysis {
112        let symbols = self.language_analyzer.extract_symbols(syntax_tree);
113        let metrics = self.calculate_metrics(syntax_tree, &symbols);
114        let dependencies = self.extract_dependencies(syntax_tree);
115        let issues = self.analyze_issues(syntax_tree, &symbols);
116        let complexity = self.calculate_complexity(syntax_tree, &symbols);
117        let structure = self.analyze_structure(&symbols);
118
119        CodeAnalysis {
120            file_path: file_path.to_string(),
121            language: syntax_tree.language.clone(),
122            metrics,
123            symbols,
124            dependencies,
125            issues,
126            complexity,
127            structure,
128        }
129    }
130
131    /// Calculate basic code metrics
132    fn calculate_metrics(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> CodeMetrics {
133        let lines = tree.source_code.lines().collect::<Vec<_>>();
134        let total_lines = lines.len();
135
136        let mut comment_lines = 0;
137        let mut blank_lines = 0;
138
139        for line in &lines {
140            let trimmed = line.trim();
141            if trimmed.is_empty() {
142                blank_lines += 1;
143            } else if trimmed.starts_with("//")
144                || trimmed.starts_with("/*")
145                || trimmed.starts_with("#")
146                || trimmed.starts_with("'''")
147            {
148                comment_lines += 1;
149            }
150        }
151
152        let code_lines = total_lines - comment_lines - blank_lines;
153        let comment_ratio = if code_lines > 0 {
154            comment_lines as f64 / code_lines as f64
155        } else {
156            0.0
157        };
158
159        let functions_count = symbols
160            .iter()
161            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
162            .count();
163        let classes_count = symbols
164            .iter()
165            .filter(|s| {
166                matches!(
167                    s.kind,
168                    SymbolKind::Class | SymbolKind::Struct | SymbolKind::Interface
169                )
170            })
171            .count();
172        let variables_count = symbols
173            .iter()
174            .filter(|s| matches!(s.kind, SymbolKind::Variable))
175            .count();
176        let imports_count = symbols
177            .iter()
178            .filter(|s| matches!(s.kind, SymbolKind::Import))
179            .count();
180
181        CodeMetrics {
182            lines_of_code: code_lines,
183            lines_of_comments: comment_lines,
184            blank_lines,
185            functions_count,
186            classes_count,
187            variables_count,
188            imports_count,
189            comment_ratio,
190        }
191    }
192
193    /// Extract dependencies from the code
194    fn extract_dependencies(&self, tree: &SyntaxTree) -> Vec<DependencyInfo> {
195        let mut dependencies = Vec::new();
196
197        // Extract imports based on language
198        match tree.language {
199            LanguageSupport::Rust => {
200                self.extract_rust_dependencies(&tree.root, &mut dependencies);
201            }
202            LanguageSupport::Python => {
203                self.extract_python_dependencies(&tree.root, &mut dependencies);
204            }
205            LanguageSupport::JavaScript | LanguageSupport::TypeScript => {
206                self.extract_js_dependencies(&tree.root, &mut dependencies);
207            }
208            LanguageSupport::Go => {
209                self.extract_go_dependencies(&tree.root, &mut dependencies);
210            }
211            LanguageSupport::Java => {
212                self.extract_java_dependencies(&tree.root, &mut dependencies);
213            }
214            LanguageSupport::Swift => {
215                self.extract_swift_dependencies(&tree.root, &mut dependencies);
216            }
217        }
218
219        dependencies
220    }
221
222    fn extract_rust_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
223        if node.kind == "use_declaration" {
224            if let Some(path_node) = node
225                .named_children
226                .get("argument")
227                .and_then(|children| children.first())
228            {
229                deps.push(DependencyInfo {
230                    name: path_node.text.clone(),
231                    kind: DependencyKind::Import,
232                    source: "use".to_string(),
233                    position: path_node.start_position.clone(),
234                });
235            }
236        }
237
238        for child in &node.children {
239            self.extract_rust_dependencies(child, deps);
240        }
241    }
242
243    fn extract_python_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
244        if node.kind == "import_statement" || node.kind == "import_from_statement" {
245            for child in &node.children {
246                if child.kind == "dotted_name" {
247                    deps.push(DependencyInfo {
248                        name: child.text.clone(),
249                        kind: DependencyKind::Import,
250                        source: "import".to_string(),
251                        position: child.start_position.clone(),
252                    });
253                }
254            }
255        }
256
257        for child in &node.children {
258            self.extract_python_dependencies(child, deps);
259        }
260    }
261
262    fn extract_js_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
263        if node.kind == "import_statement" {
264            for child in &node.children {
265                if child.kind == "string" {
266                    deps.push(DependencyInfo {
267                        name: child.text.clone(),
268                        kind: DependencyKind::Import,
269                        source: "import".to_string(),
270                        position: child.start_position.clone(),
271                    });
272                }
273            }
274        }
275
276        for child in &node.children {
277            self.extract_js_dependencies(child, deps);
278        }
279    }
280
281    fn extract_go_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
282        if node.kind == "import_declaration" {
283            for child in &node.children {
284                if let Some(spec_node) = child.named_children.get("spec") {
285                    if let Some(path_node) =
286                        spec_node.first().and_then(|n| n.named_children.get("path"))
287                    {
288                        if let Some(string_node) = path_node.first() {
289                            deps.push(DependencyInfo {
290                                name: string_node.text.clone(),
291                                kind: DependencyKind::Import,
292                                source: "import".to_string(),
293                                position: string_node.start_position.clone(),
294                            });
295                        }
296                    }
297                }
298            }
299        }
300
301        for child in &node.children {
302            self.extract_go_dependencies(child, deps);
303        }
304    }
305
306    fn extract_java_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
307        if node.kind == "import_declaration" {
308            for child in &node.children {
309                if let Some(name_node) = child.named_children.get("qualified_name") {
310                    if let Some(name) = name_node.first() {
311                        deps.push(DependencyInfo {
312                            name: name.text.clone(),
313                            kind: DependencyKind::Import,
314                            source: "import".to_string(),
315                            position: name.start_position.clone(),
316                        });
317                    }
318                }
319            }
320        }
321
322        for child in &node.children {
323            self.extract_java_dependencies(child, deps);
324        }
325    }
326
327    #[allow(dead_code)]
328    fn extract_swift_dependencies(&self, node: &SyntaxNode, deps: &mut Vec<DependencyInfo>) {
329        if node.kind == "import_declaration" {
330            for child in &node.children {
331                if let Some(path_node) = child.named_children.get("path") {
332                    if let Some(path) = path_node.first() {
333                        deps.push(DependencyInfo {
334                            name: path.text.clone(),
335                            kind: DependencyKind::Import,
336                            source: "import".to_string(),
337                            position: path.start_position.clone(),
338                        });
339                    }
340                }
341            }
342        }
343
344        for child in &node.children {
345            self.extract_swift_dependencies(child, deps);
346        }
347    }
348
349    /// Analyze code for potential issues
350    fn analyze_issues(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> Vec<AnalysisIssue> {
351        let mut issues = Vec::new();
352
353        // Check for long functions
354        for symbol in symbols {
355            if matches!(symbol.kind, SymbolKind::Function | SymbolKind::Method) {
356                if let Some(signature) = &symbol.signature {
357                    if signature.len() > 100 {
358                        issues.push(AnalysisIssue {
359                            level: IssueLevel::Info,
360                            category: IssueCategory::Maintainability,
361                            message: format!("Long function signature: {}", symbol.name),
362                            position: symbol.position.clone(),
363                            suggestion: Some(
364                                "Consider breaking down into smaller functions".to_string(),
365                            ),
366                        });
367                    }
368                }
369            }
370        }
371
372        // Check for high cyclomatic complexity (simplified)
373        let complexity = self.calculate_complexity(tree, symbols);
374        if complexity.cyclomatic_complexity > 10 {
375            issues.push(AnalysisIssue {
376                level: IssueLevel::Warning,
377                category: IssueCategory::Complexity,
378                message: format!(
379                    "High cyclomatic complexity: {}",
380                    complexity.cyclomatic_complexity
381                ),
382                position: Position {
383                    row: 0,
384                    column: 0,
385                    byte_offset: 0,
386                },
387                suggestion: Some("Consider refactoring to reduce complexity".to_string()),
388            });
389        }
390
391        // Check for missing documentation
392        for symbol in symbols {
393            if matches!(symbol.kind, SymbolKind::Function | SymbolKind::Class)
394                && symbol.documentation.is_none()
395            {
396                issues.push(AnalysisIssue {
397                    level: IssueLevel::Info,
398                    category: IssueCategory::Maintainability,
399                    message: format!("Missing documentation for: {}", symbol.name),
400                    position: symbol.position.clone(),
401                    suggestion: Some("Add documentation comments".to_string()),
402                });
403            }
404        }
405
406        issues
407    }
408
409    /// Calculate code complexity metrics
410    fn calculate_complexity(&self, tree: &SyntaxTree, symbols: &[SymbolInfo]) -> ComplexityMetrics {
411        let mut cyclomatic_complexity = 1; // Base complexity
412        let mut cognitive_complexity = 0;
413        let mut max_nesting_depth = 0;
414
415        // Calculate complexity based on language-specific constructs
416        self.calculate_language_complexity(
417            &tree.root,
418            &mut cyclomatic_complexity,
419            &mut cognitive_complexity,
420            0,
421            &mut max_nesting_depth,
422        );
423
424        let function_lengths: Vec<usize> = symbols
425            .iter()
426            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
427            .filter_map(|s| s.signature.as_ref().map(|sig| sig.lines().count()))
428            .collect();
429
430        let function_length_average = if !function_lengths.is_empty() {
431            function_lengths.iter().sum::<usize>() as f64 / function_lengths.len() as f64
432        } else {
433            0.0
434        };
435
436        let function_length_max = function_lengths.iter().cloned().max().unwrap_or(0);
437
438        // Calculate parameter statistics
439        let parameter_counts: Vec<usize> = symbols
440            .iter()
441            .filter(|s| matches!(s.kind, SymbolKind::Function | SymbolKind::Method))
442            .filter_map(|s| s.signature.as_ref())
443            .filter_map(|sig| {
444                let start = sig.find('(')?;
445                let end = sig.find(')')?;
446                let params = &sig[start + 1..end];
447                Some(params.split(',').filter(|p| !p.trim().is_empty()).count())
448            })
449            .collect();
450
451        let parameters_average = if !parameter_counts.is_empty() {
452            parameter_counts.iter().sum::<usize>() as f64 / parameter_counts.len() as f64
453        } else {
454            0.0
455        };
456
457        let parameters_max = parameter_counts.iter().cloned().max().unwrap_or(0);
458
459        ComplexityMetrics {
460            cyclomatic_complexity,
461            cognitive_complexity,
462            nesting_depth: max_nesting_depth,
463            function_length_average,
464            function_length_max,
465            parameters_average,
466            parameters_max,
467        }
468    }
469
470    fn calculate_language_complexity(
471        &self,
472        node: &SyntaxNode,
473        cc: &mut usize,
474        cognitive: &mut usize,
475        depth: usize,
476        max_depth: &mut usize,
477    ) {
478        *max_depth = (*max_depth).max(depth);
479
480        // Language-specific complexity calculations
481        match node.kind.as_str() {
482            // Control flow increases cyclomatic complexity
483            k if k.contains("if") || k.contains("else") => {
484                *cc += 1;
485                *cognitive += 1;
486            }
487            k if k.contains("for") || k.contains("while") || k.contains("loop") => {
488                *cc += 1;
489                *cognitive += 2;
490            }
491            k if k.contains("switch") || k.contains("match") => {
492                *cc += node
493                    .named_children
494                    .get("body")
495                    .and_then(|children| Some(children.len().saturating_sub(1)))
496                    .unwrap_or(0);
497                *cognitive += 1;
498            }
499            k if k.contains("try") || k.contains("catch") => {
500                *cc += 1;
501                *cognitive += 1;
502            }
503            k if k.contains("function") || k.contains("method") => {
504                *cognitive += 1; // Function definition
505            }
506            _ => {}
507        }
508
509        // Recursively calculate for children
510        for child in &node.children {
511            self.calculate_language_complexity(child, cc, cognitive, depth + 1, max_depth);
512        }
513    }
514
515    /// Analyze code structure
516    fn analyze_structure(&self, symbols: &[SymbolInfo]) -> CodeStructure {
517        let mut modules = Vec::new();
518        let mut functions = Vec::new();
519        let mut classes = Vec::new();
520        let mut hierarchy = HashMap::new();
521
522        for symbol in symbols {
523            match &symbol.kind {
524                SymbolKind::Module => modules.push(symbol.name.clone()),
525                SymbolKind::Function => functions.push(symbol.name.clone()),
526                SymbolKind::Class | SymbolKind::Struct | SymbolKind::Interface => {
527                    classes.push(symbol.name.clone());
528                }
529                _ => {}
530            }
531
532            // Build hierarchy (simplified - in practice, this would be more sophisticated)
533            if let Some(scope) = &symbol.scope {
534                hierarchy
535                    .entry(scope.clone())
536                    .or_insert_with(Vec::new)
537                    .push(symbol.name.clone());
538            }
539        }
540
541        CodeStructure {
542            modules,
543            functions,
544            classes,
545            hierarchy,
546        }
547    }
548}
549
550/// Utility functions for code analysis
551pub struct AnalysisUtils;
552
553impl AnalysisUtils {
554    /// Calculate code duplication (simplified)
555    pub fn calculate_duplication(tree: &SyntaxTree) -> f64 {
556        // Implement a more sophisticated duplication analysis
557        // This looks for similar code structures in the tree
558
559        // Traverse the tree and count nodes
560        fn traverse_node(
561            node: &SyntaxNode,
562            node_counts: &mut std::collections::HashMap<String, usize>,
563        ) -> usize {
564            let mut count = 1; // Count this node
565
566            // Create a signature for this node based on its kind and children
567            let mut signature = node.kind.clone();
568
569            // Add children signatures
570            for child in &node.children {
571                let child_count = traverse_node(child, node_counts);
572                count += child_count;
573                signature.push_str(&format!("_{}", child.kind));
574            }
575
576            // Update the count for this signature
577            *node_counts.entry(signature).or_insert(0) += 1;
578
579            count
580        }
581
582        let mut node_counts = std::collections::HashMap::new();
583        let total_nodes = traverse_node(&tree.root, &mut node_counts);
584
585        // Count how many patterns appear more than once
586        let duplicate_patterns = node_counts.values().filter(|&&count| count > 1).count();
587
588        // Calculate duplication ratio
589        if total_nodes == 0 {
590            0.0
591        } else {
592            (duplicate_patterns as f64 / total_nodes as f64) * 100.0
593        }
594    }
595
596    /// Analyze code maintainability index
597    pub fn calculate_maintainability_index(analysis: &CodeAnalysis) -> f64 {
598        let metrics = &analysis.metrics;
599        let complexity = &analysis.complexity;
600
601        // Simplified maintainability index calculation
602        let halstead_volume =
603            metrics.lines_of_code as f64 * (metrics.functions_count as f64).log2();
604        let cyclomatic_complexity = complexity.cyclomatic_complexity as f64;
605        let lines_of_code = metrics.lines_of_code as f64;
606
607        let mi = 171.0
608            - 5.2 * halstead_volume.log2()
609            - 0.23 * cyclomatic_complexity
610            - 16.2 * lines_of_code.log2();
611        mi.max(0.0).min(171.0) // Clamp between 0 and 171
612    }
613
614    /// Generate code quality score
615    pub fn calculate_quality_score(analysis: &CodeAnalysis) -> f64 {
616        let mut score: f64 = 100.0;
617
618        // Deduct points for issues
619        for issue in &analysis.issues {
620            match issue.level {
621                IssueLevel::Error => score -= 20.0,
622                IssueLevel::Warning => score -= 10.0,
623                IssueLevel::Info => score -= 2.0,
624            }
625        }
626
627        // Bonus for good practices
628        if analysis.metrics.comment_ratio > 0.1 {
629            score += 5.0;
630        }
631
632        if analysis.complexity.cyclomatic_complexity < 5 {
633            score += 10.0;
634        }
635
636        score.max(0.0).min(100.0)
637    }
638}