infiniloom_engine/analysis/
complexity.rs

1//! Code complexity metrics calculation for all supported languages
2//!
3//! Computes cyclomatic complexity, cognitive complexity, Halstead metrics,
4//! and maintainability index for functions/methods.
5
6use crate::analysis::types::{ComplexityMetrics, HalsteadMetrics, LocMetrics};
7use crate::parser::Language;
8use std::collections::HashSet;
9use tree_sitter::Node;
10
11/// Calculates complexity metrics from AST nodes
12pub struct ComplexityCalculator {
13    /// Source code being analyzed
14    source: String,
15}
16
17impl ComplexityCalculator {
18    /// Create a new calculator with the given source code
19    pub fn new(source: impl Into<String>) -> Self {
20        Self { source: source.into() }
21    }
22
23    /// Get text for a node
24    fn node_text(&self, node: &Node<'_>) -> &str {
25        node.utf8_text(self.source.as_bytes()).unwrap_or("")
26    }
27
28    /// Calculate all complexity metrics for a function node
29    pub fn calculate(&self, node: &Node<'_>, language: Language) -> ComplexityMetrics {
30        let cyclomatic = self.cyclomatic_complexity(node, language);
31        let cognitive = self.cognitive_complexity(node, language);
32        let halstead = self.halstead_metrics(node, language);
33        let loc = self.loc_metrics(node);
34        let max_nesting_depth = self.max_nesting_depth(node, language);
35        let parameter_count = self.parameter_count(node, language);
36        let return_count = self.return_count(node, language);
37
38        // Calculate maintainability index (MI)
39        // Formula: MI = 171 - 5.2 * ln(V) - 0.23 * CC - 16.2 * ln(LOC)
40        // Where V = Halstead Volume, CC = Cyclomatic Complexity, LOC = Lines of Code
41        let maintainability_index = halstead.as_ref().map(|h| {
42            let v = h.volume.max(1.0);
43            let cc = cyclomatic as f32;
44            let loc = loc.source.max(1) as f32;
45
46            let mi = 171.0 - 5.2 * v.ln() - 0.23 * cc - 16.2 * loc.ln();
47            // Normalize to 0-100 scale
48            (mi.max(0.0) * 100.0 / 171.0).min(100.0)
49        });
50
51        ComplexityMetrics {
52            cyclomatic,
53            cognitive,
54            halstead,
55            loc,
56            maintainability_index,
57            max_nesting_depth,
58            parameter_count,
59            return_count,
60        }
61    }
62
63    /// Calculate cyclomatic complexity (McCabe's complexity)
64    ///
65    /// CC = E - N + 2P (for a single function, P=1)
66    /// Simplified: CC = 1 + number of decision points
67    ///
68    /// Decision points: if, else if, while, for, case, catch, &&, ||, ?:
69    pub fn cyclomatic_complexity(&self, node: &Node<'_>, language: Language) -> u32 {
70        let mut complexity = 1; // Base complexity
71
72        self.walk_tree(node, &mut |child| {
73            if self.is_decision_point(child, language) {
74                complexity += 1;
75            }
76        });
77
78        complexity
79    }
80
81    /// Check if a node is a decision point (contributes to cyclomatic complexity)
82    fn is_decision_point(&self, node: &Node<'_>, language: Language) -> bool {
83        let kind = node.kind();
84
85        // Language-agnostic decision points
86        let common_decisions = [
87            "if_statement",
88            "if_expression",
89            "if",
90            "else_if",
91            "elif",
92            "elsif",
93            "while_statement",
94            "while_expression",
95            "while",
96            "for_statement",
97            "for_expression",
98            "for",
99            "for_in_statement",
100            "foreach",
101            "case",
102            "when",
103            "match_arm",
104            "catch_clause",
105            "except_clause",
106            "rescue",
107            "conditional_expression", // ternary
108            "ternary_expression",
109            "binary_expression",
110            "logical_and",
111            "logical_or",
112        ];
113
114        if common_decisions.contains(&kind) {
115            return true;
116        }
117
118        // Check for && and || operators in binary expressions
119        if kind == "binary_expression" || kind == "binary_operator" {
120            let text = self.node_text(node);
121            if text.contains("&&")
122                || text.contains("||")
123                || text.contains(" and ")
124                || text.contains(" or ")
125            {
126                return true;
127            }
128        }
129
130        // Language-specific decision points
131        match language {
132            Language::Rust => {
133                matches!(kind, "match_expression" | "if_let_expression" | "while_let_expression")
134            },
135            Language::Go => matches!(kind, "select_statement" | "type_switch_statement"),
136            Language::Swift => matches!(kind, "guard_statement" | "switch_statement"),
137            Language::Kotlin => matches!(kind, "when_expression"),
138            Language::Haskell => matches!(kind, "case_expression" | "guard"),
139            Language::Elixir => matches!(kind, "case" | "cond" | "with"),
140            Language::Clojure => matches!(kind, "cond" | "case"),
141            Language::OCaml => matches!(kind, "match_expression"),
142            _ => false,
143        }
144    }
145
146    /// Calculate cognitive complexity
147    ///
148    /// Cognitive complexity measures how hard code is to understand.
149    /// It penalizes nesting, breaks in linear flow, and complex control structures.
150    pub fn cognitive_complexity(&self, node: &Node<'_>, language: Language) -> u32 {
151        let mut complexity = 0;
152        self.cognitive_walk(node, language, 0, &mut complexity);
153        complexity
154    }
155
156    fn cognitive_walk(
157        &self,
158        node: &Node<'_>,
159        language: Language,
160        nesting: u32,
161        complexity: &mut u32,
162    ) {
163        let kind = node.kind();
164
165        // Increment for control flow structures
166        let is_control_flow = self.is_control_flow(kind, language);
167        if is_control_flow {
168            // Base increment
169            *complexity += 1;
170            // Nesting increment
171            *complexity += nesting;
172        }
173
174        // Increment for breaks in linear flow
175        if self.is_flow_break(kind, language) {
176            *complexity += 1;
177        }
178
179        // Recursion penalty
180        if self.is_recursion(node, language) {
181            *complexity += 1;
182        }
183
184        // Walk children with updated nesting
185        let new_nesting = if is_control_flow || self.is_nesting_structure(kind, language) {
186            nesting + 1
187        } else {
188            nesting
189        };
190
191        let mut cursor = node.walk();
192        for child in node.children(&mut cursor) {
193            self.cognitive_walk(&child, language, new_nesting, complexity);
194        }
195    }
196
197    fn is_control_flow(&self, kind: &str, language: Language) -> bool {
198        let common_control = [
199            "if_statement",
200            "if_expression",
201            "while_statement",
202            "while_expression",
203            "for_statement",
204            "for_expression",
205            "for_in_statement",
206            "switch_statement",
207            "match_expression",
208            "try_statement",
209        ];
210
211        if common_control.contains(&kind) {
212            return true;
213        }
214
215        match language {
216            Language::Rust => matches!(kind, "if_let_expression" | "while_let_expression"),
217            Language::Go => matches!(kind, "select_statement"),
218            Language::Swift => matches!(kind, "guard_statement"),
219            _ => false,
220        }
221    }
222
223    fn is_flow_break(&self, kind: &str, _language: Language) -> bool {
224        matches!(
225            kind,
226            "break_statement"
227                | "continue_statement"
228                | "goto_statement"
229                | "return_statement"
230                | "throw_statement"
231                | "raise"
232        )
233    }
234
235    fn is_nesting_structure(&self, kind: &str, _language: Language) -> bool {
236        matches!(
237            kind,
238            "lambda_expression"
239                | "anonymous_function"
240                | "closure_expression"
241                | "block"
242                | "arrow_function"
243                | "function_expression"
244        )
245    }
246
247    fn is_recursion(&self, node: &Node<'_>, _language: Language) -> bool {
248        // Check if this node is a function call to the current function
249        // This is a simplified check - full recursion detection would need function context
250        if node.kind() == "call_expression" || node.kind() == "function_call" {
251            // Would need to compare called function name with enclosing function name
252            // For now, return false - this would need more context
253        }
254        false
255    }
256
257    /// Calculate Halstead complexity metrics
258    pub fn halstead_metrics(&self, node: &Node<'_>, language: Language) -> Option<HalsteadMetrics> {
259        let mut operators = HashSet::new();
260        let mut operands = HashSet::new();
261        let mut total_operators = 0u32;
262        let mut total_operands = 0u32;
263
264        self.walk_tree(node, &mut |child| {
265            let kind = child.kind();
266            let text = self.node_text(child);
267
268            if self.is_operator(kind, language) {
269                operators.insert(text.to_owned());
270                total_operators += 1;
271            } else if self.is_operand(kind, language) {
272                operands.insert(text.to_owned());
273                total_operands += 1;
274            }
275        });
276
277        let n1 = operators.len() as u32; // distinct operators
278        let n2 = operands.len() as u32; // distinct operands
279        let nn1 = total_operators; // total operators
280        let nn2 = total_operands; // total operands
281
282        if n1 == 0 || n2 == 0 {
283            return None;
284        }
285
286        let vocabulary = n1 + n2;
287        let length = nn1 + nn2;
288
289        // Calculated length: n1 * log2(n1) + n2 * log2(n2)
290        let calculated_length = (n1 as f32) * (n1 as f32).log2() + (n2 as f32) * (n2 as f32).log2();
291
292        // Volume: N * log2(n)
293        let volume = (length as f32) * (vocabulary as f32).log2();
294
295        // Difficulty: (n1/2) * (N2/n2)
296        let difficulty = ((n1 as f32) / 2.0) * ((nn2 as f32) / (n2 as f32).max(1.0));
297
298        // Effort: D * V
299        let effort = difficulty * volume;
300
301        // Time to program: E / 18 (seconds)
302        let time = effort / 18.0;
303
304        // Estimated bugs: V / 3000
305        let bugs = volume / 3000.0;
306
307        Some(HalsteadMetrics {
308            distinct_operators: n1,
309            distinct_operands: n2,
310            total_operators: nn1,
311            total_operands: nn2,
312            vocabulary,
313            length,
314            calculated_length,
315            volume,
316            difficulty,
317            effort,
318            time,
319            bugs,
320        })
321    }
322
323    fn is_operator(&self, kind: &str, _language: Language) -> bool {
324        matches!(
325            kind,
326            "binary_operator"
327                | "unary_operator"
328                | "assignment_operator"
329                | "comparison_operator"
330                | "arithmetic_operator"
331                | "logical_operator"
332                | "bitwise_operator"
333                | "+"
334                | "-"
335                | "*"
336                | "/"
337                | "%"
338                | "="
339                | "=="
340                | "!="
341                | "<"
342                | ">"
343                | "<="
344                | ">="
345                | "&&"
346                | "||"
347                | "!"
348                | "&"
349                | "|"
350                | "^"
351                | "~"
352                | "<<"
353                | ">>"
354                | "+="
355                | "-="
356                | "*="
357                | "/="
358                | "."
359                | "->"
360                | "::"
361                | "?"
362                | ":"
363        )
364    }
365
366    fn is_operand(&self, kind: &str, _language: Language) -> bool {
367        matches!(
368            kind,
369            "identifier"
370                | "number"
371                | "integer"
372                | "float"
373                | "string"
374                | "string_literal"
375                | "number_literal"
376                | "integer_literal"
377                | "float_literal"
378                | "boolean"
379                | "true"
380                | "false"
381                | "nil"
382                | "null"
383                | "none"
384        )
385    }
386
387    /// Calculate lines of code metrics
388    pub fn loc_metrics(&self, node: &Node<'_>) -> LocMetrics {
389        let text = self.node_text(node);
390        let lines: Vec<&str> = text.lines().collect();
391
392        let mut source = 0u32;
393        let mut comments = 0u32;
394        let mut blank = 0u32;
395
396        for line in &lines {
397            let trimmed = line.trim();
398            if trimmed.is_empty() {
399                blank += 1;
400            } else if self.is_comment_line(trimmed) {
401                comments += 1;
402            } else {
403                source += 1;
404            }
405        }
406
407        LocMetrics { total: lines.len() as u32, source, comments, blank }
408    }
409
410    fn is_comment_line(&self, line: &str) -> bool {
411        line.starts_with("//")
412            || line.starts_with('#')
413            || line.starts_with("/*")
414            || line.starts_with('*')
415            || line.starts_with("*/")
416            || line.starts_with("--")
417            || line.starts_with(";;")
418            || line.starts_with("\"\"\"")
419            || line.starts_with("'''")
420    }
421
422    /// Calculate maximum nesting depth
423    pub fn max_nesting_depth(&self, node: &Node<'_>, language: Language) -> u32 {
424        let mut max_depth = 0;
425        self.nesting_walk(node, language, 0, &mut max_depth);
426        max_depth
427    }
428
429    fn nesting_walk(&self, node: &Node<'_>, language: Language, depth: u32, max_depth: &mut u32) {
430        let kind = node.kind();
431
432        let is_nesting =
433            self.is_control_flow(kind, language) || self.is_nesting_structure(kind, language);
434
435        let new_depth = if is_nesting { depth + 1 } else { depth };
436
437        if new_depth > *max_depth {
438            *max_depth = new_depth;
439        }
440
441        let mut cursor = node.walk();
442        for child in node.children(&mut cursor) {
443            self.nesting_walk(&child, language, new_depth, max_depth);
444        }
445    }
446
447    /// Count number of parameters
448    pub fn parameter_count(&self, node: &Node<'_>, _language: Language) -> u32 {
449        let mut count = 0;
450
451        // Find parameters node
452        if let Some(params) = node.child_by_field_name("parameters") {
453            let mut cursor = params.walk();
454            for child in params.children(&mut cursor) {
455                let kind = child.kind();
456                if kind.contains("parameter")
457                    || kind == "identifier"
458                    || kind == "typed_parameter"
459                    || kind == "formal_parameter"
460                {
461                    count += 1;
462                }
463            }
464        }
465
466        count
467    }
468
469    /// Count number of return statements
470    pub fn return_count(&self, node: &Node<'_>, _language: Language) -> u32 {
471        let mut count = 0;
472
473        self.walk_tree(node, &mut |child| {
474            if child.kind() == "return_statement" || child.kind() == "return" {
475                count += 1;
476            }
477        });
478
479        // If no explicit return but function has expression body, count as 1
480        if count == 0 {
481            count = 1;
482        }
483
484        count
485    }
486
487    /// Walk tree and apply callback to each node
488    fn walk_tree<F>(&self, node: &Node<'_>, callback: &mut F)
489    where
490        F: FnMut(&Node<'_>),
491    {
492        callback(node);
493
494        let mut cursor = node.walk();
495        for child in node.children(&mut cursor) {
496            self.walk_tree(&child, callback);
497        }
498    }
499}
500
501/// Calculate complexity for a function given its source code
502pub fn calculate_complexity(
503    source: &str,
504    node: &Node<'_>,
505    language: Language,
506) -> ComplexityMetrics {
507    let calculator = ComplexityCalculator::new(source);
508    calculator.calculate(node, language)
509}
510
511/// Calculate complexity for source code without needing a tree-sitter node
512///
513/// This is a convenience function that handles the parsing internally.
514/// Returns an error if the source cannot be parsed.
515pub fn calculate_complexity_from_source(
516    source: &str,
517    language: Language,
518) -> Result<ComplexityMetrics, String> {
519    // Get tree-sitter language for parsing
520    let ts_language = match language {
521        Language::Python => tree_sitter_python::LANGUAGE.into(),
522        Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
523        Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
524        Language::Rust => tree_sitter_rust::LANGUAGE.into(),
525        Language::Go => tree_sitter_go::LANGUAGE.into(),
526        Language::Java => tree_sitter_java::LANGUAGE.into(),
527        Language::C => tree_sitter_c::LANGUAGE.into(),
528        Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
529        Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
530        Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
531        Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
532        Language::Swift => tree_sitter_swift::LANGUAGE.into(),
533        Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
534        Language::Scala => tree_sitter_scala::LANGUAGE.into(),
535        Language::Haskell => tree_sitter_haskell::LANGUAGE.into(),
536        Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
537        Language::Clojure => tree_sitter_clojure::LANGUAGE.into(),
538        Language::OCaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
539        Language::Lua => tree_sitter_lua::LANGUAGE.into(),
540        Language::R => tree_sitter_r::LANGUAGE.into(),
541        Language::Bash => tree_sitter_bash::LANGUAGE.into(),
542        // FSharp doesn't have tree-sitter support yet
543        Language::FSharp => {
544            return Err(
545                "F# complexity analysis not yet supported (no tree-sitter parser available)"
546                    .to_owned(),
547            )
548        },
549    };
550
551    let mut ts_parser = tree_sitter::Parser::new();
552    ts_parser
553        .set_language(&ts_language)
554        .map_err(|e| format!("Failed to set language: {}", e))?;
555
556    let tree = ts_parser
557        .parse(source, None)
558        .ok_or_else(|| "Failed to parse source code".to_owned())?;
559
560    let calculator = ComplexityCalculator::new(source);
561    Ok(calculator.calculate(&tree.root_node(), language))
562}
563
564/// Thresholds for complexity warnings
565#[derive(Debug, Clone, Copy)]
566pub struct ComplexityThresholds {
567    /// Cyclomatic complexity warning threshold
568    pub cyclomatic_warn: u32,
569    /// Cyclomatic complexity error threshold
570    pub cyclomatic_error: u32,
571    /// Cognitive complexity warning threshold
572    pub cognitive_warn: u32,
573    /// Cognitive complexity error threshold
574    pub cognitive_error: u32,
575    /// Max nesting depth warning threshold
576    pub nesting_warn: u32,
577    /// Max nesting depth error threshold
578    pub nesting_error: u32,
579    /// Max parameter count warning threshold
580    pub params_warn: u32,
581    /// Max parameter count error threshold
582    pub params_error: u32,
583    /// Maintainability index warning threshold (below this)
584    pub maintainability_warn: f32,
585    /// Maintainability index error threshold (below this)
586    pub maintainability_error: f32,
587}
588
589impl Default for ComplexityThresholds {
590    fn default() -> Self {
591        Self {
592            cyclomatic_warn: 10,
593            cyclomatic_error: 20,
594            cognitive_warn: 15,
595            cognitive_error: 30,
596            nesting_warn: 4,
597            nesting_error: 6,
598            params_warn: 5,
599            params_error: 8,
600            maintainability_warn: 40.0,
601            maintainability_error: 20.0,
602        }
603    }
604}
605
606/// Severity of a complexity issue
607#[derive(Debug, Clone, Copy, PartialEq, Eq)]
608pub enum ComplexitySeverity {
609    Ok,
610    Warning,
611    Error,
612}
613
614/// Check complexity metrics against thresholds
615pub fn check_complexity(
616    metrics: &ComplexityMetrics,
617    thresholds: &ComplexityThresholds,
618) -> Vec<(String, ComplexitySeverity)> {
619    let mut issues = Vec::new();
620
621    // Cyclomatic complexity
622    if metrics.cyclomatic >= thresholds.cyclomatic_error {
623        issues.push((
624            format!(
625                "Cyclomatic complexity {} exceeds error threshold {}",
626                metrics.cyclomatic, thresholds.cyclomatic_error
627            ),
628            ComplexitySeverity::Error,
629        ));
630    } else if metrics.cyclomatic >= thresholds.cyclomatic_warn {
631        issues.push((
632            format!(
633                "Cyclomatic complexity {} exceeds warning threshold {}",
634                metrics.cyclomatic, thresholds.cyclomatic_warn
635            ),
636            ComplexitySeverity::Warning,
637        ));
638    }
639
640    // Cognitive complexity
641    if metrics.cognitive >= thresholds.cognitive_error {
642        issues.push((
643            format!(
644                "Cognitive complexity {} exceeds error threshold {}",
645                metrics.cognitive, thresholds.cognitive_error
646            ),
647            ComplexitySeverity::Error,
648        ));
649    } else if metrics.cognitive >= thresholds.cognitive_warn {
650        issues.push((
651            format!(
652                "Cognitive complexity {} exceeds warning threshold {}",
653                metrics.cognitive, thresholds.cognitive_warn
654            ),
655            ComplexitySeverity::Warning,
656        ));
657    }
658
659    // Nesting depth
660    if metrics.max_nesting_depth >= thresholds.nesting_error {
661        issues.push((
662            format!(
663                "Nesting depth {} exceeds error threshold {}",
664                metrics.max_nesting_depth, thresholds.nesting_error
665            ),
666            ComplexitySeverity::Error,
667        ));
668    } else if metrics.max_nesting_depth >= thresholds.nesting_warn {
669        issues.push((
670            format!(
671                "Nesting depth {} exceeds warning threshold {}",
672                metrics.max_nesting_depth, thresholds.nesting_warn
673            ),
674            ComplexitySeverity::Warning,
675        ));
676    }
677
678    // Parameter count
679    if metrics.parameter_count >= thresholds.params_error {
680        issues.push((
681            format!(
682                "Parameter count {} exceeds error threshold {}",
683                metrics.parameter_count, thresholds.params_error
684            ),
685            ComplexitySeverity::Error,
686        ));
687    } else if metrics.parameter_count >= thresholds.params_warn {
688        issues.push((
689            format!(
690                "Parameter count {} exceeds warning threshold {}",
691                metrics.parameter_count, thresholds.params_warn
692            ),
693            ComplexitySeverity::Warning,
694        ));
695    }
696
697    // Maintainability index
698    if let Some(mi) = metrics.maintainability_index {
699        if mi <= thresholds.maintainability_error {
700            issues.push((
701                format!(
702                    "Maintainability index {:.1} below error threshold {}",
703                    mi, thresholds.maintainability_error
704                ),
705                ComplexitySeverity::Error,
706            ));
707        } else if mi <= thresholds.maintainability_warn {
708            issues.push((
709                format!(
710                    "Maintainability index {:.1} below warning threshold {}",
711                    mi, thresholds.maintainability_warn
712                ),
713                ComplexitySeverity::Warning,
714            ));
715        }
716    }
717
718    issues
719}
720
721#[cfg(test)]
722mod tests {
723    use super::*;
724
725    #[test]
726    fn test_loc_metrics() {
727        let source = r#"
728fn example() {
729    // Comment
730    let x = 1;
731
732    /* Multi-line
733     * comment */
734    let y = 2;
735}
736"#;
737        let calculator = ComplexityCalculator::new(source);
738        // Note: This would need actual tree-sitter node for proper test
739        // For now, test the helper
740        assert!(calculator.is_comment_line("// Comment"));
741        assert!(calculator.is_comment_line("/* Multi-line"));
742        assert!(!calculator.is_comment_line("let x = 1;"));
743    }
744
745    #[test]
746    fn test_thresholds_default() {
747        let thresholds = ComplexityThresholds::default();
748        assert_eq!(thresholds.cyclomatic_warn, 10);
749        assert_eq!(thresholds.cognitive_warn, 15);
750    }
751
752    #[test]
753    fn test_check_complexity() {
754        let metrics = ComplexityMetrics {
755            cyclomatic: 25,
756            cognitive: 35,
757            max_nesting_depth: 7,
758            parameter_count: 10,
759            maintainability_index: Some(15.0),
760            ..Default::default()
761        };
762
763        let thresholds = ComplexityThresholds::default();
764        let issues = check_complexity(&metrics, &thresholds);
765
766        // Should have errors for all metrics
767        assert!(issues.len() >= 4);
768        assert!(issues
769            .iter()
770            .any(|(msg, sev)| { msg.contains("Cyclomatic") && *sev == ComplexitySeverity::Error }));
771    }
772}