infiniloom_engine/analysis/
complexity.rs

1//! Code complexity metrics calculation for all supported languages
2//!
3//! Computes cyclomatic complexity, cognitive complexity, Halstead metrics,
4//! and maintainability index for functions/methods.
5
6use crate::analysis::types::{ComplexityMetrics, HalsteadMetrics, LocMetrics};
7use crate::parser::Language;
8use std::collections::HashSet;
9use tree_sitter::Node;
10
11/// Calculates complexity metrics from AST nodes
12pub struct ComplexityCalculator {
13    /// Source code being analyzed
14    source: String,
15}
16
17impl ComplexityCalculator {
18    /// Create a new calculator with the given source code
19    pub fn new(source: impl Into<String>) -> Self {
20        Self {
21            source: source.into(),
22        }
23    }
24
25    /// Get text for a node
26    fn node_text(&self, node: &Node) -> &str {
27        node.utf8_text(self.source.as_bytes()).unwrap_or("")
28    }
29
30    /// Calculate all complexity metrics for a function node
31    pub fn calculate(&self, node: &Node, language: Language) -> ComplexityMetrics {
32        let cyclomatic = self.cyclomatic_complexity(node, language);
33        let cognitive = self.cognitive_complexity(node, language);
34        let halstead = self.halstead_metrics(node, language);
35        let loc = self.loc_metrics(node);
36        let max_nesting_depth = self.max_nesting_depth(node, language);
37        let parameter_count = self.parameter_count(node, language);
38        let return_count = self.return_count(node, language);
39
40        // Calculate maintainability index (MI)
41        // Formula: MI = 171 - 5.2 * ln(V) - 0.23 * CC - 16.2 * ln(LOC)
42        // Where V = Halstead Volume, CC = Cyclomatic Complexity, LOC = Lines of Code
43        let maintainability_index = halstead.as_ref().map(|h| {
44            let v = h.volume.max(1.0);
45            let cc = cyclomatic as f32;
46            let loc = loc.source.max(1) as f32;
47
48            let mi = 171.0 - 5.2 * v.ln() - 0.23 * cc - 16.2 * loc.ln();
49            // Normalize to 0-100 scale
50            (mi.max(0.0) * 100.0 / 171.0).min(100.0)
51        });
52
53        ComplexityMetrics {
54            cyclomatic,
55            cognitive,
56            halstead,
57            loc,
58            maintainability_index,
59            max_nesting_depth,
60            parameter_count,
61            return_count,
62        }
63    }
64
65    /// Calculate cyclomatic complexity (McCabe's complexity)
66    ///
67    /// CC = E - N + 2P (for a single function, P=1)
68    /// Simplified: CC = 1 + number of decision points
69    ///
70    /// Decision points: if, else if, while, for, case, catch, &&, ||, ?:
71    pub fn cyclomatic_complexity(&self, node: &Node, language: Language) -> u32 {
72        let mut complexity = 1; // Base complexity
73
74        self.walk_tree(node, &mut |child| {
75            if self.is_decision_point(child, language) {
76                complexity += 1;
77            }
78        });
79
80        complexity
81    }
82
83    /// Check if a node is a decision point (contributes to cyclomatic complexity)
84    fn is_decision_point(&self, node: &Node, language: Language) -> bool {
85        let kind = node.kind();
86
87        // Language-agnostic decision points
88        let common_decisions = [
89            "if_statement",
90            "if_expression",
91            "if",
92            "else_if",
93            "elif",
94            "elsif",
95            "while_statement",
96            "while_expression",
97            "while",
98            "for_statement",
99            "for_expression",
100            "for",
101            "for_in_statement",
102            "foreach",
103            "case",
104            "when",
105            "match_arm",
106            "catch_clause",
107            "except_clause",
108            "rescue",
109            "conditional_expression", // ternary
110            "ternary_expression",
111            "binary_expression",
112            "logical_and",
113            "logical_or",
114        ];
115
116        if common_decisions.contains(&kind) {
117            return true;
118        }
119
120        // Check for && and || operators in binary expressions
121        if kind == "binary_expression" || kind == "binary_operator" {
122            let text = self.node_text(node);
123            if text.contains("&&") || text.contains("||") || text.contains(" and ") || text.contains(" or ") {
124                return true;
125            }
126        }
127
128        // Language-specific decision points
129        match language {
130            Language::Rust => matches!(
131                kind,
132                "match_expression" | "if_let_expression" | "while_let_expression"
133            ),
134            Language::Go => matches!(kind, "select_statement" | "type_switch_statement"),
135            Language::Swift => matches!(kind, "guard_statement" | "switch_statement"),
136            Language::Kotlin => matches!(kind, "when_expression"),
137            Language::Haskell => matches!(kind, "case_expression" | "guard"),
138            Language::Elixir => matches!(kind, "case" | "cond" | "with"),
139            Language::Clojure => matches!(kind, "cond" | "case"),
140            Language::OCaml => matches!(kind, "match_expression"),
141            _ => false,
142        }
143    }
144
145    /// Calculate cognitive complexity
146    ///
147    /// Cognitive complexity measures how hard code is to understand.
148    /// It penalizes nesting, breaks in linear flow, and complex control structures.
149    pub fn cognitive_complexity(&self, node: &Node, language: Language) -> u32 {
150        let mut complexity = 0;
151        self.cognitive_walk(node, language, 0, &mut complexity);
152        complexity
153    }
154
155    fn cognitive_walk(&self, node: &Node, language: Language, nesting: u32, complexity: &mut u32) {
156        let kind = node.kind();
157
158        // Increment for control flow structures
159        let is_control_flow = self.is_control_flow(kind, language);
160        if is_control_flow {
161            // Base increment
162            *complexity += 1;
163            // Nesting increment
164            *complexity += nesting;
165        }
166
167        // Increment for breaks in linear flow
168        if self.is_flow_break(kind, language) {
169            *complexity += 1;
170        }
171
172        // Recursion penalty
173        if self.is_recursion(node, language) {
174            *complexity += 1;
175        }
176
177        // Walk children with updated nesting
178        let new_nesting = if is_control_flow || self.is_nesting_structure(kind, language) {
179            nesting + 1
180        } else {
181            nesting
182        };
183
184        let mut cursor = node.walk();
185        for child in node.children(&mut cursor) {
186            self.cognitive_walk(&child, language, new_nesting, complexity);
187        }
188    }
189
190    fn is_control_flow(&self, kind: &str, language: Language) -> bool {
191        let common_control = [
192            "if_statement",
193            "if_expression",
194            "while_statement",
195            "while_expression",
196            "for_statement",
197            "for_expression",
198            "for_in_statement",
199            "switch_statement",
200            "match_expression",
201            "try_statement",
202        ];
203
204        if common_control.contains(&kind) {
205            return true;
206        }
207
208        match language {
209            Language::Rust => matches!(kind, "if_let_expression" | "while_let_expression"),
210            Language::Go => matches!(kind, "select_statement"),
211            Language::Swift => matches!(kind, "guard_statement"),
212            _ => false,
213        }
214    }
215
216    fn is_flow_break(&self, kind: &str, _language: Language) -> bool {
217        matches!(
218            kind,
219            "break_statement"
220                | "continue_statement"
221                | "goto_statement"
222                | "return_statement"
223                | "throw_statement"
224                | "raise"
225        )
226    }
227
228    fn is_nesting_structure(&self, kind: &str, _language: Language) -> bool {
229        matches!(
230            kind,
231            "lambda_expression"
232                | "anonymous_function"
233                | "closure_expression"
234                | "block"
235                | "arrow_function"
236                | "function_expression"
237        )
238    }
239
240    fn is_recursion(&self, node: &Node, _language: Language) -> bool {
241        // Check if this node is a function call to the current function
242        // This is a simplified check - full recursion detection would need function context
243        if node.kind() == "call_expression" || node.kind() == "function_call" {
244            // Would need to compare called function name with enclosing function name
245            // For now, return false - this would need more context
246        }
247        false
248    }
249
250    /// Calculate Halstead complexity metrics
251    pub fn halstead_metrics(&self, node: &Node, language: Language) -> Option<HalsteadMetrics> {
252        let mut operators = HashSet::new();
253        let mut operands = HashSet::new();
254        let mut total_operators = 0u32;
255        let mut total_operands = 0u32;
256
257        self.walk_tree(node, &mut |child| {
258            let kind = child.kind();
259            let text = self.node_text(child);
260
261            if self.is_operator(kind, language) {
262                operators.insert(text.to_string());
263                total_operators += 1;
264            } else if self.is_operand(kind, language) {
265                operands.insert(text.to_string());
266                total_operands += 1;
267            }
268        });
269
270        let n1 = operators.len() as u32; // distinct operators
271        let n2 = operands.len() as u32; // distinct operands
272        let nn1 = total_operators; // total operators
273        let nn2 = total_operands; // total operands
274
275        if n1 == 0 || n2 == 0 {
276            return None;
277        }
278
279        let vocabulary = n1 + n2;
280        let length = nn1 + nn2;
281
282        // Calculated length: n1 * log2(n1) + n2 * log2(n2)
283        let calculated_length =
284            (n1 as f32) * (n1 as f32).log2() + (n2 as f32) * (n2 as f32).log2();
285
286        // Volume: N * log2(n)
287        let volume = (length as f32) * (vocabulary as f32).log2();
288
289        // Difficulty: (n1/2) * (N2/n2)
290        let difficulty = ((n1 as f32) / 2.0) * ((nn2 as f32) / (n2 as f32).max(1.0));
291
292        // Effort: D * V
293        let effort = difficulty * volume;
294
295        // Time to program: E / 18 (seconds)
296        let time = effort / 18.0;
297
298        // Estimated bugs: V / 3000
299        let bugs = volume / 3000.0;
300
301        Some(HalsteadMetrics {
302            distinct_operators: n1,
303            distinct_operands: n2,
304            total_operators: nn1,
305            total_operands: nn2,
306            vocabulary,
307            length,
308            calculated_length,
309            volume,
310            difficulty,
311            effort,
312            time,
313            bugs,
314        })
315    }
316
317    fn is_operator(&self, kind: &str, _language: Language) -> bool {
318        matches!(
319            kind,
320            "binary_operator"
321                | "unary_operator"
322                | "assignment_operator"
323                | "comparison_operator"
324                | "arithmetic_operator"
325                | "logical_operator"
326                | "bitwise_operator"
327                | "+"
328                | "-"
329                | "*"
330                | "/"
331                | "%"
332                | "="
333                | "=="
334                | "!="
335                | "<"
336                | ">"
337                | "<="
338                | ">="
339                | "&&"
340                | "||"
341                | "!"
342                | "&"
343                | "|"
344                | "^"
345                | "~"
346                | "<<"
347                | ">>"
348                | "+="
349                | "-="
350                | "*="
351                | "/="
352                | "."
353                | "->"
354                | "::"
355                | "?"
356                | ":"
357        )
358    }
359
360    fn is_operand(&self, kind: &str, _language: Language) -> bool {
361        matches!(
362            kind,
363            "identifier"
364                | "number"
365                | "integer"
366                | "float"
367                | "string"
368                | "string_literal"
369                | "number_literal"
370                | "integer_literal"
371                | "float_literal"
372                | "boolean"
373                | "true"
374                | "false"
375                | "nil"
376                | "null"
377                | "none"
378        )
379    }
380
381    /// Calculate lines of code metrics
382    pub fn loc_metrics(&self, node: &Node) -> LocMetrics {
383        let text = self.node_text(node);
384        let lines: Vec<&str> = text.lines().collect();
385
386        let mut source = 0u32;
387        let mut comments = 0u32;
388        let mut blank = 0u32;
389
390        for line in &lines {
391            let trimmed = line.trim();
392            if trimmed.is_empty() {
393                blank += 1;
394            } else if self.is_comment_line(trimmed) {
395                comments += 1;
396            } else {
397                source += 1;
398            }
399        }
400
401        LocMetrics {
402            total: lines.len() as u32,
403            source,
404            comments,
405            blank,
406        }
407    }
408
409    fn is_comment_line(&self, line: &str) -> bool {
410        line.starts_with("//")
411            || line.starts_with('#')
412            || line.starts_with("/*")
413            || line.starts_with('*')
414            || line.starts_with("*/")
415            || line.starts_with("--")
416            || line.starts_with(";;")
417            || line.starts_with("\"\"\"")
418            || line.starts_with("'''")
419    }
420
421    /// Calculate maximum nesting depth
422    pub fn max_nesting_depth(&self, node: &Node, language: Language) -> u32 {
423        let mut max_depth = 0;
424        self.nesting_walk(node, language, 0, &mut max_depth);
425        max_depth
426    }
427
428    fn nesting_walk(&self, node: &Node, language: Language, depth: u32, max_depth: &mut u32) {
429        let kind = node.kind();
430
431        let is_nesting = self.is_control_flow(kind, language)
432            || self.is_nesting_structure(kind, language);
433
434        let new_depth = if is_nesting { depth + 1 } else { depth };
435
436        if new_depth > *max_depth {
437            *max_depth = new_depth;
438        }
439
440        let mut cursor = node.walk();
441        for child in node.children(&mut cursor) {
442            self.nesting_walk(&child, language, new_depth, max_depth);
443        }
444    }
445
446    /// Count number of parameters
447    pub fn parameter_count(&self, node: &Node, _language: Language) -> u32 {
448        let mut count = 0;
449
450        // Find parameters node
451        if let Some(params) = node.child_by_field_name("parameters") {
452            let mut cursor = params.walk();
453            for child in params.children(&mut cursor) {
454                let kind = child.kind();
455                if kind.contains("parameter")
456                    || kind == "identifier"
457                    || kind == "typed_parameter"
458                    || kind == "formal_parameter"
459                {
460                    count += 1;
461                }
462            }
463        }
464
465        count
466    }
467
468    /// Count number of return statements
469    pub fn return_count(&self, node: &Node, _language: Language) -> u32 {
470        let mut count = 0;
471
472        self.walk_tree(node, &mut |child| {
473            if child.kind() == "return_statement" || child.kind() == "return" {
474                count += 1;
475            }
476        });
477
478        // If no explicit return but function has expression body, count as 1
479        if count == 0 {
480            count = 1;
481        }
482
483        count
484    }
485
486    /// Walk tree and apply callback to each node
487    fn walk_tree<F>(&self, node: &Node, callback: &mut F)
488    where
489        F: FnMut(&Node),
490    {
491        callback(node);
492
493        let mut cursor = node.walk();
494        for child in node.children(&mut cursor) {
495            self.walk_tree(&child, callback);
496        }
497    }
498}
499
500/// Calculate complexity for a function given its source code
501pub fn calculate_complexity(source: &str, node: &Node, language: Language) -> ComplexityMetrics {
502    let calculator = ComplexityCalculator::new(source);
503    calculator.calculate(node, language)
504}
505
506/// Calculate complexity for source code without needing a tree-sitter node
507///
508/// This is a convenience function that handles the parsing internally.
509/// Returns an error if the source cannot be parsed.
510pub fn calculate_complexity_from_source(
511    source: &str,
512    language: Language,
513) -> Result<ComplexityMetrics, String> {
514    // Get tree-sitter language for parsing
515    let ts_language = match language {
516        Language::Python => tree_sitter_python::LANGUAGE.into(),
517        Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
518        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
519        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
520        Language::Go => tree_sitter_go::LANGUAGE.into(),
521        Language::Java => tree_sitter_java::LANGUAGE.into(),
522        Language::C => tree_sitter_c::LANGUAGE.into(),
523        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
524        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
525        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
526        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
527        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
528        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
529        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
530        Language::Haskell => tree_sitter_haskell::LANGUAGE.into(),
531        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
532        Language::Clojure => tree_sitter_clojure::LANGUAGE.into(),
533        Language::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
534        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
535        Language::R => tree_sitter_r::LANGUAGE.into(),
536        Language::Bash => tree_sitter_bash::LANGUAGE.into(),
537        // FSharp doesn't have tree-sitter support yet
538        Language::FSharp => return Err("F# complexity analysis not yet supported (no tree-sitter parser available)".to_string()),
539    };
540
541    let mut ts_parser = tree_sitter::Parser::new();
542    ts_parser
543        .set_language(&ts_language)
544        .map_err(|e| format!("Failed to set language: {}", e))?;
545
546    let tree = ts_parser
547        .parse(source, None)
548        .ok_or_else(|| "Failed to parse source code".to_string())?;
549
550    let calculator = ComplexityCalculator::new(source);
551    Ok(calculator.calculate(&tree.root_node(), language))
552}
553
554/// Thresholds for complexity warnings
555#[derive(Debug, Clone, Copy)]
556pub struct ComplexityThresholds {
557    /// Cyclomatic complexity warning threshold
558    pub cyclomatic_warn: u32,
559    /// Cyclomatic complexity error threshold
560    pub cyclomatic_error: u32,
561    /// Cognitive complexity warning threshold
562    pub cognitive_warn: u32,
563    /// Cognitive complexity error threshold
564    pub cognitive_error: u32,
565    /// Max nesting depth warning threshold
566    pub nesting_warn: u32,
567    /// Max nesting depth error threshold
568    pub nesting_error: u32,
569    /// Max parameter count warning threshold
570    pub params_warn: u32,
571    /// Max parameter count error threshold
572    pub params_error: u32,
573    /// Maintainability index warning threshold (below this)
574    pub maintainability_warn: f32,
575    /// Maintainability index error threshold (below this)
576    pub maintainability_error: f32,
577}
578
579impl Default for ComplexityThresholds {
580    fn default() -> Self {
581        Self {
582            cyclomatic_warn: 10,
583            cyclomatic_error: 20,
584            cognitive_warn: 15,
585            cognitive_error: 30,
586            nesting_warn: 4,
587            nesting_error: 6,
588            params_warn: 5,
589            params_error: 8,
590            maintainability_warn: 40.0,
591            maintainability_error: 20.0,
592        }
593    }
594}
595
596/// Severity of a complexity issue
597#[derive(Debug, Clone, Copy, PartialEq, Eq)]
598pub enum ComplexitySeverity {
599    Ok,
600    Warning,
601    Error,
602}
603
604/// Check complexity metrics against thresholds
605pub fn check_complexity(
606    metrics: &ComplexityMetrics,
607    thresholds: &ComplexityThresholds,
608) -> Vec<(String, ComplexitySeverity)> {
609    let mut issues = Vec::new();
610
611    // Cyclomatic complexity
612    if metrics.cyclomatic >= thresholds.cyclomatic_error {
613        issues.push((
614            format!(
615                "Cyclomatic complexity {} exceeds error threshold {}",
616                metrics.cyclomatic, thresholds.cyclomatic_error
617            ),
618            ComplexitySeverity::Error,
619        ));
620    } else if metrics.cyclomatic >= thresholds.cyclomatic_warn {
621        issues.push((
622            format!(
623                "Cyclomatic complexity {} exceeds warning threshold {}",
624                metrics.cyclomatic, thresholds.cyclomatic_warn
625            ),
626            ComplexitySeverity::Warning,
627        ));
628    }
629
630    // Cognitive complexity
631    if metrics.cognitive >= thresholds.cognitive_error {
632        issues.push((
633            format!(
634                "Cognitive complexity {} exceeds error threshold {}",
635                metrics.cognitive, thresholds.cognitive_error
636            ),
637            ComplexitySeverity::Error,
638        ));
639    } else if metrics.cognitive >= thresholds.cognitive_warn {
640        issues.push((
641            format!(
642                "Cognitive complexity {} exceeds warning threshold {}",
643                metrics.cognitive, thresholds.cognitive_warn
644            ),
645            ComplexitySeverity::Warning,
646        ));
647    }
648
649    // Nesting depth
650    if metrics.max_nesting_depth >= thresholds.nesting_error {
651        issues.push((
652            format!(
653                "Nesting depth {} exceeds error threshold {}",
654                metrics.max_nesting_depth, thresholds.nesting_error
655            ),
656            ComplexitySeverity::Error,
657        ));
658    } else if metrics.max_nesting_depth >= thresholds.nesting_warn {
659        issues.push((
660            format!(
661                "Nesting depth {} exceeds warning threshold {}",
662                metrics.max_nesting_depth, thresholds.nesting_warn
663            ),
664            ComplexitySeverity::Warning,
665        ));
666    }
667
668    // Parameter count
669    if metrics.parameter_count >= thresholds.params_error {
670        issues.push((
671            format!(
672                "Parameter count {} exceeds error threshold {}",
673                metrics.parameter_count, thresholds.params_error
674            ),
675            ComplexitySeverity::Error,
676        ));
677    } else if metrics.parameter_count >= thresholds.params_warn {
678        issues.push((
679            format!(
680                "Parameter count {} exceeds warning threshold {}",
681                metrics.parameter_count, thresholds.params_warn
682            ),
683            ComplexitySeverity::Warning,
684        ));
685    }
686
687    // Maintainability index
688    if let Some(mi) = metrics.maintainability_index {
689        if mi <= thresholds.maintainability_error {
690            issues.push((
691                format!(
692                    "Maintainability index {:.1} below error threshold {}",
693                    mi, thresholds.maintainability_error
694                ),
695                ComplexitySeverity::Error,
696            ));
697        } else if mi <= thresholds.maintainability_warn {
698            issues.push((
699                format!(
700                    "Maintainability index {:.1} below warning threshold {}",
701                    mi, thresholds.maintainability_warn
702                ),
703                ComplexitySeverity::Warning,
704            ));
705        }
706    }
707
708    issues
709}
710
711#[cfg(test)]
712mod tests {
713    use super::*;
714
715    #[test]
716    fn test_loc_metrics() {
717        let source = r#"
718fn example() {
719    // Comment
720    let x = 1;
721
722    /* Multi-line
723     * comment */
724    let y = 2;
725}
726"#;
727        let calculator = ComplexityCalculator::new(source);
728        // Note: This would need actual tree-sitter node for proper test
729        // For now, test the helper
730        assert!(calculator.is_comment_line("// Comment"));
731        assert!(calculator.is_comment_line("/* Multi-line"));
732        assert!(!calculator.is_comment_line("let x = 1;"));
733    }
734
735    #[test]
736    fn test_thresholds_default() {
737        let thresholds = ComplexityThresholds::default();
738        assert_eq!(thresholds.cyclomatic_warn, 10);
739        assert_eq!(thresholds.cognitive_warn, 15);
740    }
741
742    #[test]
743    fn test_check_complexity() {
744        let metrics = ComplexityMetrics {
745            cyclomatic: 25,
746            cognitive: 35,
747            max_nesting_depth: 7,
748            parameter_count: 10,
749            maintainability_index: Some(15.0),
750            ..Default::default()
751        };
752
753        let thresholds = ComplexityThresholds::default();
754        let issues = check_complexity(&metrics, &thresholds);
755
756        // Should have errors for all metrics
757        assert!(issues.len() >= 4);
758        assert!(issues.iter().any(|(msg, sev)| {
759            msg.contains("Cyclomatic") && *sev == ComplexitySeverity::Error
760        }));
761    }
762}