Skip to main content

tldr_cli/commands/patterns/
cohesion.rs

1//! Cohesion command - LCOM4 (Lack of Cohesion of Methods) analysis for Python classes.
2//!
3//! LCOM4 measures class cohesion by counting connected components in the method-field graph:
4//! - LCOM4 = 1: All methods are connected (cohesive class)
5//! - LCOM4 > 1: Methods form disconnected groups (split candidate)
6//!
7//! # Algorithm
8//!
9//! 1. Parse class, extract methods and field accesses (`self.x`)
10//! 2. Build bipartite graph: methods <-> fields they access
11//! 3. Add edges for intra-class method calls (`self.method()`)
12//! 4. Count connected components via union-find with path compression
13//!
14//! # TIGER Mitigations
15//!
16//! - **T06**: Union-find with path compression AND union by rank
17//! - **E01**: `--timeout` flag (default 30s)
18//! - **E04**: `MAX_METHODS_PER_CLASS` and `MAX_FIELDS_PER_CLASS` limits
19//! - **E05**: `MAX_ITERATIONS` for union-find operations
20//!
21//! # Example
22//!
23//! ```bash
24//! tldr cohesion src/models.py
25//! tldr cohesion src/models.py --min-methods 3 --include-dunder
26//! tldr cohesion src/ --format text
27//! ```
28
29use std::collections::{HashMap, HashSet};
30use std::path::{Path, PathBuf};
31use std::time::{Duration, Instant};
32
33use anyhow::Result;
34use clap::{Args, ValueEnum};
35use colored::Colorize;
36use serde::{Deserialize, Serialize};
37use tldr_core::walker::walk_project;
38use tree_sitter::{Node, Parser, Tree};
39use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
40
41use tldr_core::quality::cohesion as core_cohesion;
42use tldr_core::types::Language;
43
44use crate::output::{common_path_prefix, strip_prefix_display, OutputFormat as GlobalOutputFormat};
45
46use super::error::{PatternsError, PatternsResult};
47use super::types::{
48    ClassCohesion, CohesionReport, CohesionSummary, CohesionVerdict, ComponentInfo,
49};
50use super::validation::{
51    read_file_safe, validate_directory_path, validate_file_path, validate_file_path_in_project,
52    MAX_CLASSES_PER_FILE, MAX_DIRECTORY_FILES, MAX_FIELDS_PER_CLASS, MAX_METHODS_PER_CLASS,
53};
54
55// =============================================================================
56// Constants (TIGER/ELEPHANT Mitigations)
57// =============================================================================
58
59/// Maximum union-find iterations to prevent infinite loops (E05)
60const MAX_UNION_FIND_ITERATIONS: usize = 10_000;
61
62/// Default timeout in seconds (E01)
63const DEFAULT_TIMEOUT_SECS: u64 = 30;
64
65// =============================================================================
66// Output Format
67// =============================================================================
68
69/// Output format for cohesion command
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Serialize, Deserialize)]
71#[serde(rename_all = "lowercase")]
72pub enum OutputFormat {
73    /// JSON output (default)
74    #[default]
75    Json,
76    /// Human-readable text output
77    Text,
78}
79
80// =============================================================================
81// CLI Arguments
82// =============================================================================
83
84/// Compute LCOM4 (Lack of Cohesion of Methods) metric for Python classes.
85///
86/// LCOM4 measures class cohesion by counting connected components in the
87/// method-field bipartite graph. A cohesive class has LCOM4 = 1, while
88/// a class with LCOM4 > 1 is a candidate for splitting.
89///
90/// # Example
91///
92/// ```bash
93/// tldr cohesion src/models.py
94/// tldr cohesion src/models.py --min-methods 3
95/// tldr cohesion src/ --format text
96/// ```
97#[derive(Debug, Args)]
98pub struct CohesionArgs {
99    /// File or directory to analyze
100    pub path: PathBuf,
101
102    /// Minimum number of instance methods for a class to be included in analysis.
103    /// Classes with fewer methods are filtered from results. For Rust and Go,
104    /// only instance methods (with self/receiver) are counted, not associated
105    /// functions like new() or default().
106    #[arg(long, default_value = "1")]
107    pub min_methods: u32,
108
109    /// Include dunder methods (__init__, __str__, etc.) in analysis
110    #[arg(long)]
111    pub include_dunder: bool,
112
113    /// Output format (json or text). Prefer global --format/-f flag.
114    #[arg(
115        long = "output-format",
116        alias = "output",
117        short = 'o',
118        hide = true,
119        value_enum,
120        default_value = "json"
121    )]
122    pub output_format: OutputFormat,
123
124    /// Analysis timeout in seconds
125    #[arg(long, default_value_t = DEFAULT_TIMEOUT_SECS)]
126    pub timeout: u64,
127
128    /// Project root for path validation (optional)
129    #[arg(long)]
130    pub project_root: Option<PathBuf>,
131
132    /// Language filter (auto-detected if omitted)
133    #[arg(long, short = 'l')]
134    pub lang: Option<Language>,
135}
136
137impl CohesionArgs {
138    /// Run the cohesion analysis command
139    pub fn run(&self, global_format: GlobalOutputFormat) -> Result<()> {
140        let start = Instant::now();
141        let timeout = Duration::from_secs(self.timeout);
142
143        // Validate path
144        let canonical_path = if let Some(ref root) = self.project_root {
145            validate_file_path_in_project(&self.path, root)?
146        } else {
147            validate_file_path(&self.path)?
148        };
149
150        // Analyze based on path type
151        let mut report = if canonical_path.is_dir() {
152            analyze_directory(&canonical_path, self, start, timeout)?
153        } else {
154            analyze_single_file(&canonical_path, self)?
155        };
156
157        // (path-and-schema-cleanup-v3 P3.BUG-N2) When the user supplied
158        // a single file path, echo it verbatim in each class's
159        // `file_path`. The canonical path was used for the read above,
160        // but downstream consumers expect the JSON to mirror the input
161        // (no `/tmp/...` -> `/private/tmp/...` rewrite on macOS).
162        // Directory mode skips this — each file there is the resolved
163        // path of a walker entry, not user-supplied.
164        if !canonical_path.is_dir() {
165            let user_path_str = self.path.display().to_string();
166            for class in &mut report.classes {
167                class.file_path = user_path_str.clone();
168            }
169        }
170
171        // Resolve format: global -f flag takes priority over hidden --output-format
172        let use_text = matches!(global_format, GlobalOutputFormat::Text)
173            || matches!(self.output_format, OutputFormat::Text);
174
175        // Output based on format
176        if use_text {
177            let text = format_cohesion_text(&report);
178            println!("{}", text);
179        } else {
180            let json = serde_json::to_string_pretty(&report)?;
181            println!("{}", json);
182        }
183
184        Ok(())
185    }
186}
187
188// =============================================================================
189// Union-Find with Path Compression (TIGER-06)
190// =============================================================================
191
192/// Union-Find (Disjoint Set Union) data structure with path compression and union by rank.
193///
194/// This implementation uses both optimizations to achieve near-O(1) amortized time per operation:
195/// - **Path compression**: Flatten tree during `find` operations
196/// - **Union by rank**: Attach smaller tree under root of larger tree
197///
198/// # TIGER-06 Mitigation
199///
200/// Path compression prevents worst-case O(n) find operations.
201/// Union by rank keeps trees balanced.
202#[derive(Debug, Clone)]
203pub struct UnionFind {
204    /// Parent pointers (index -> parent index)
205    parent: Vec<usize>,
206    /// Rank for union by rank optimization
207    rank: Vec<usize>,
208    /// Iteration counter to prevent infinite loops (E05)
209    iterations: usize,
210    /// Maximum allowed iterations
211    max_iterations: usize,
212}
213
214impl UnionFind {
215    /// Create a new union-find structure with n elements
216    pub fn new(n: usize) -> Self {
217        Self {
218            parent: (0..n).collect(),
219            rank: vec![0; n],
220            iterations: 0,
221            max_iterations: MAX_UNION_FIND_ITERATIONS,
222        }
223    }
224
225    /// Find the root of the set containing x, with path compression.
226    ///
227    /// Returns None if max iterations exceeded.
228    pub fn find(&mut self, x: usize) -> Option<usize> {
229        if x >= self.parent.len() {
230            return None;
231        }
232
233        // Find root
234        let mut root = x;
235        while self.parent[root] != root {
236            self.iterations += 1;
237            if self.iterations > self.max_iterations {
238                return None; // Exceeded iteration limit
239            }
240            root = self.parent[root];
241        }
242
243        // Path compression: point all nodes on path directly to root
244        let mut current = x;
245        while self.parent[current] != root {
246            self.iterations += 1;
247            if self.iterations > self.max_iterations {
248                return None;
249            }
250            let next = self.parent[current];
251            self.parent[current] = root;
252            current = next;
253        }
254
255        Some(root)
256    }
257
258    /// Union the sets containing x and y, using union by rank.
259    ///
260    /// Returns true if a union was performed, false if already in same set or error.
261    pub fn union(&mut self, x: usize, y: usize) -> bool {
262        let root_x = match self.find(x) {
263            Some(r) => r,
264            None => return false,
265        };
266        let root_y = match self.find(y) {
267            Some(r) => r,
268            None => return false,
269        };
270
271        if root_x == root_y {
272            return false; // Already in same set
273        }
274
275        // Union by rank: attach smaller tree under root of larger tree
276        match self.rank[root_x].cmp(&self.rank[root_y]) {
277            std::cmp::Ordering::Less => {
278                self.parent[root_x] = root_y;
279            }
280            std::cmp::Ordering::Greater => {
281                self.parent[root_y] = root_x;
282            }
283            std::cmp::Ordering::Equal => {
284                self.parent[root_y] = root_x;
285                self.rank[root_x] += 1;
286            }
287        }
288
289        true
290    }
291
292    /// Count the number of unique connected components.
293    ///
294    /// Only counts components for the first `method_count` elements (ignoring fields
295    /// that are only connected to a single method).
296    pub fn count_components(&mut self, method_count: usize) -> usize {
297        let mut roots = HashSet::new();
298        for i in 0..method_count.min(self.parent.len()) {
299            if let Some(root) = self.find(i) {
300                roots.insert(root);
301            }
302        }
303        roots.len()
304    }
305
306    /// Get components as groups of indices.
307    pub fn get_components(&mut self) -> HashMap<usize, Vec<usize>> {
308        let mut components: HashMap<usize, Vec<usize>> = HashMap::new();
309        for i in 0..self.parent.len() {
310            if let Some(root) = self.find(i) {
311                components.entry(root).or_default().push(i);
312            }
313        }
314        components
315    }
316
317    /// Check if iteration limit was exceeded
318    pub fn limit_exceeded(&self) -> bool {
319        self.iterations > self.max_iterations
320    }
321}
322
323// =============================================================================
324// Method Analysis
325// =============================================================================
326
327/// Analysis result for a single method
328#[derive(Debug, Clone)]
329struct MethodAnalysis {
330    /// Method name
331    name: String,
332    /// Fields accessed by this method (self.x)
333    field_accesses: Vec<String>,
334    /// Other methods called (self.method())
335    method_calls: Vec<String>,
336}
337
338// =============================================================================
339// Core Analysis Functions
340// =============================================================================
341
342/// Analyze a single file for class cohesion.
343///
344/// For Python files, uses the CLI's own Python-specific implementation.
345/// For all other supported languages (Java, TypeScript, Go, Rust, etc.),
346/// delegates to the core library's multi-language analyzer.
347fn analyze_single_file(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
348    let lang = Language::from_path(path);
349
350    // For non-Python languages, delegate to the core multi-language analyzer
351    if lang != Some(Language::Python) && lang.is_some() {
352        return analyze_single_file_core(path, args);
353    }
354
355    // Python: use the CLI's existing Python-specific implementation
356    let source = read_file_safe(path)?;
357    let tree = parse_python(&source, path)?;
358    let classes = analyze_file_ast(&tree, &source, path, args)?;
359
360    let summary = compute_summary(&classes);
361
362    Ok(CohesionReport { classes, summary })
363}
364
365/// Analyze a single non-Python file using the core library.
366fn analyze_single_file_core(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
367    let threshold = 2;
368    let core_report = core_cohesion::analyze_cohesion(path, None, threshold).map_err(|e| {
369        PatternsError::ParseError {
370            file: path.to_path_buf(),
371            message: format!("Core cohesion analysis failed: {}", e),
372        }
373    })?;
374
375    // Convert core types to CLI types
376    let classes: Vec<ClassCohesion> = core_report
377        .classes
378        .into_iter()
379        .filter(|c| c.method_count >= args.min_methods as usize)
380        .map(|c| ClassCohesion {
381            class_name: c.name,
382            file_path: c.file.display().to_string(),
383            line: c.line as u32,
384            lcom4: c.lcom4 as u32,
385            method_count: c.method_count as u32,
386            field_count: c.field_count as u32,
387            verdict: match c.verdict {
388                core_cohesion::CohesionVerdict::Cohesive => CohesionVerdict::Cohesive,
389                core_cohesion::CohesionVerdict::SplitCandidate => CohesionVerdict::SplitCandidate,
390            },
391            split_suggestion: c.split_suggestion,
392            components: c
393                .components
394                .into_iter()
395                .map(|comp| ComponentInfo {
396                    methods: comp.methods,
397                    fields: comp.fields,
398                })
399                .collect(),
400        })
401        .collect();
402
403    let summary = compute_summary(&classes);
404    Ok(CohesionReport { classes, summary })
405}
406
407/// Analyze a directory of source files for class cohesion.
408///
409/// Supports Python, Java, TypeScript, JavaScript, Go, Rust, and other
410/// languages supported by the core library.
411fn analyze_directory(
412    dir: &Path,
413    args: &CohesionArgs,
414    start: Instant,
415    timeout: Duration,
416) -> PatternsResult<CohesionReport> {
417    validate_directory_path(dir)?;
418
419    let mut all_classes = Vec::new();
420    let mut file_count = 0u32;
421
422    for entry in walk_project(dir) {
423        // Check timeout
424        if start.elapsed() > timeout {
425            return Err(PatternsError::Timeout {
426                timeout_secs: args.timeout,
427            });
428        }
429
430        // Check file limit
431        if file_count >= MAX_DIRECTORY_FILES {
432            return Err(PatternsError::TooManyFiles {
433                count: file_count,
434                max_files: MAX_DIRECTORY_FILES,
435            });
436        }
437
438        let path = entry.path();
439
440        // Analyze files with recognized language extensions
441        if path.is_file() && Language::from_path(path).is_some() {
442            file_count += 1;
443
444            // Skip test files unless explicitly included
445            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
446            if filename.starts_with("test_") || filename.ends_with("_test.py") {
447                continue;
448            }
449
450            // Analyze file, collecting errors but continuing
451            match analyze_single_file(path, args) {
452                Ok(report) => {
453                    all_classes.extend(report.classes);
454                }
455                Err(_) => {
456                    // Skip files with parse errors
457                    continue;
458                }
459            }
460        }
461    }
462
463    let summary = compute_summary(&all_classes);
464
465    Ok(CohesionReport {
466        classes: all_classes,
467        summary,
468    })
469}
470
471/// Parse Python source code with tree-sitter.
472fn parse_python(source: &str, file: &Path) -> PatternsResult<Tree> {
473    let mut parser = Parser::new();
474    parser
475        .set_language(&PYTHON_LANGUAGE.into())
476        .map_err(|e| PatternsError::ParseError {
477            file: file.to_path_buf(),
478            message: format!("Failed to set Python language: {}", e),
479        })?;
480
481    parser
482        .parse(source, None)
483        .ok_or_else(|| PatternsError::ParseError {
484            file: file.to_path_buf(),
485            message: "Parsing returned None".to_string(),
486        })
487}
488
489/// Analyze all classes in a parsed Python file.
490fn analyze_file_ast(
491    tree: &Tree,
492    source: &str,
493    file: &Path,
494    args: &CohesionArgs,
495) -> PatternsResult<Vec<ClassCohesion>> {
496    let root = tree.root_node();
497    let source_bytes = source.as_bytes();
498    let mut results = Vec::new();
499    let mut class_count = 0;
500
501    let mut cursor = root.walk();
502    for child in root.children(&mut cursor) {
503        if child.kind() == "class_definition" {
504            class_count += 1;
505            if class_count > MAX_CLASSES_PER_FILE {
506                break; // Limit exceeded
507            }
508
509            if let Some(cohesion) = analyze_class(child, source_bytes, file, args)? {
510                results.push(cohesion);
511            }
512        }
513    }
514
515    Ok(results)
516}
517
518/// Analyze a single class for LCOM4 cohesion.
519fn analyze_class(
520    class_node: Node,
521    source: &[u8],
522    file: &Path,
523    args: &CohesionArgs,
524) -> PatternsResult<Option<ClassCohesion>> {
525    // Get class name
526    let class_name = class_node
527        .child_by_field_name("name")
528        .map(|n| get_node_text(n, source))
529        .unwrap_or("<unknown>")
530        .to_string();
531
532    let line = class_node.start_position().row as u32 + 1;
533
534    // Get class body
535    let body = match class_node.child_by_field_name("body") {
536        Some(b) => b,
537        None => return Ok(None),
538    };
539
540    // Extract methods
541    let methods = extract_methods(body, source, args.include_dunder)?;
542
543    // Filter by min_methods threshold (use all methods for threshold)
544    let all_methods = extract_methods(body, source, true)?;
545    if all_methods.len() < args.min_methods as usize {
546        return Ok(None);
547    }
548
549    // Check method limit (E04)
550    if methods.len() > MAX_METHODS_PER_CLASS {
551        return Ok(Some(ClassCohesion {
552            class_name,
553            file_path: file.display().to_string(),
554            line,
555            lcom4: 0,
556            method_count: methods.len() as u32,
557            field_count: 0,
558            verdict: CohesionVerdict::Cohesive,
559            split_suggestion: Some("Class exceeds MAX_METHODS_PER_CLASS limit".to_string()),
560            components: vec![],
561        }));
562    }
563
564    // Collect all unique fields
565    let mut all_fields: HashSet<String> = HashSet::new();
566    let method_names: HashSet<&str> = methods.iter().map(|m| m.name.as_str()).collect();
567
568    for method in &methods {
569        for field in &method.field_accesses {
570            // Don't count method names as fields
571            if !method_names.contains(field.as_str()) {
572                all_fields.insert(field.clone());
573            }
574        }
575    }
576
577    // Check field limit (E04)
578    if all_fields.len() > MAX_FIELDS_PER_CLASS {
579        return Ok(Some(ClassCohesion {
580            class_name,
581            file_path: file.display().to_string(),
582            line,
583            lcom4: 0,
584            method_count: methods.len() as u32,
585            field_count: all_fields.len() as u32,
586            verdict: CohesionVerdict::Cohesive,
587            split_suggestion: Some("Class exceeds MAX_FIELDS_PER_CLASS limit".to_string()),
588            components: vec![],
589        }));
590    }
591
592    let fields: Vec<String> = all_fields.into_iter().collect();
593
594    // Compute LCOM4
595    let (lcom4, components) = compute_lcom4(&methods, &fields, &method_names);
596
597    // Determine verdict
598    let verdict = CohesionVerdict::from_lcom4(lcom4);
599
600    // Generate split suggestion if needed
601    let split_suggestion = if lcom4 > 1 {
602        Some(generate_split_suggestion(&class_name, &components))
603    } else {
604        None
605    };
606
607    Ok(Some(ClassCohesion {
608        class_name,
609        file_path: file.display().to_string(),
610        line,
611        lcom4,
612        method_count: methods.len() as u32,
613        field_count: fields.len() as u32,
614        verdict,
615        split_suggestion,
616        components,
617    }))
618}
619
620/// Extract methods from a class body.
621fn extract_methods(
622    body: Node,
623    source: &[u8],
624    include_dunder: bool,
625) -> PatternsResult<Vec<MethodAnalysis>> {
626    let mut methods = Vec::new();
627    let mut cursor = body.walk();
628
629    for child in body.children(&mut cursor) {
630        // Handle both sync and async function definitions
631        if child.kind() == "function_definition" || child.kind() == "async_function_definition" {
632            // Get method name
633            let name = child
634                .child_by_field_name("name")
635                .map(|n| get_node_text(n, source))
636                .unwrap_or("")
637                .to_string();
638
639            // Skip static methods and class methods
640            if is_static_or_classmethod(&child, source) {
641                continue;
642            }
643
644            // Filter dunder methods
645            if !include_dunder && is_dunder(&name) {
646                continue;
647            }
648
649            // Extract field accesses (self.x)
650            let field_accesses = extract_field_accesses(child, source);
651
652            // Extract method calls (self.method())
653            let method_calls = extract_method_calls(child, source);
654
655            methods.push(MethodAnalysis {
656                name,
657                field_accesses,
658                method_calls,
659            });
660        }
661    }
662
663    Ok(methods)
664}
665
666/// Check if a method is a dunder method (__xxx__)
667fn is_dunder(name: &str) -> bool {
668    name.starts_with("__") && name.ends_with("__")
669}
670
671/// Check if a method is decorated with @staticmethod or @classmethod
672fn is_static_or_classmethod(node: &Node, source: &[u8]) -> bool {
673    let mut cursor = node.walk();
674    for child in node.children(&mut cursor) {
675        if child.kind() == "decorator" {
676            let text = get_node_text(child, source);
677            if text.contains("staticmethod") || text.contains("classmethod") {
678                return true;
679            }
680        }
681    }
682    false
683}
684
685/// Extract field accesses (self.x) from a method.
686fn extract_field_accesses(method: Node, source: &[u8]) -> Vec<String> {
687    let mut fields = Vec::new();
688    let self_name = get_self_param_name(method, source);
689
690    extract_field_accesses_recursive(method, source, &self_name, &mut fields);
691
692    fields.sort();
693    fields.dedup();
694    fields
695}
696
697fn extract_field_accesses_recursive(
698    node: Node,
699    source: &[u8],
700    self_name: &str,
701    fields: &mut Vec<String>,
702) {
703    // Check if this is a self.x attribute access
704    if node.kind() == "attribute" {
705        if let Some(obj) = node.child_by_field_name("object") {
706            if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
707                if let Some(attr) = node.child_by_field_name("attribute") {
708                    let attr_name = get_node_text(attr, source);
709                    fields.push(attr_name.to_string());
710                }
711            }
712        }
713    }
714
715    // Recurse into children
716    let mut cursor = node.walk();
717    for child in node.children(&mut cursor) {
718        extract_field_accesses_recursive(child, source, self_name, fields);
719    }
720}
721
722/// Extract method calls (self.method()) from a method.
723fn extract_method_calls(method: Node, source: &[u8]) -> Vec<String> {
724    let mut calls = Vec::new();
725    let self_name = get_self_param_name(method, source);
726
727    extract_method_calls_recursive(method, source, &self_name, &mut calls);
728
729    calls.sort();
730    calls.dedup();
731    calls
732}
733
734fn extract_method_calls_recursive(
735    node: Node,
736    source: &[u8],
737    self_name: &str,
738    calls: &mut Vec<String>,
739) {
740    // Check if this is a self.method() call
741    if node.kind() == "call" {
742        if let Some(func) = node.child_by_field_name("function") {
743            if func.kind() == "attribute" {
744                if let Some(obj) = func.child_by_field_name("object") {
745                    if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
746                        if let Some(attr) = func.child_by_field_name("attribute") {
747                            let method_name = get_node_text(attr, source);
748                            calls.push(method_name.to_string());
749                        }
750                    }
751                }
752            }
753        }
754    }
755
756    // Recurse into children
757    let mut cursor = node.walk();
758    for child in node.children(&mut cursor) {
759        extract_method_calls_recursive(child, source, self_name, calls);
760    }
761}
762
763/// Get the name of the self parameter (usually "self" but could be different)
764fn get_self_param_name(method: Node, source: &[u8]) -> String {
765    if let Some(params) = method.child_by_field_name("parameters") {
766        let mut cursor = params.walk();
767        for child in params.children(&mut cursor) {
768            if child.kind() == "identifier" {
769                return get_node_text(child, source).to_string();
770            }
771        }
772    }
773    "self".to_string()
774}
775
776/// Compute LCOM4 using union-find.
777///
778/// # Returns
779/// (lcom4_value, connected_components)
780fn compute_lcom4(
781    methods: &[MethodAnalysis],
782    fields: &[String],
783    method_names: &HashSet<&str>,
784) -> (u32, Vec<ComponentInfo>) {
785    if methods.is_empty() {
786        return (0, vec![]);
787    }
788
789    // Create index mappings
790    let method_idx: HashMap<&str, usize> = methods
791        .iter()
792        .enumerate()
793        .map(|(i, m)| (m.name.as_str(), i))
794        .collect();
795
796    let field_idx: HashMap<&str, usize> = fields
797        .iter()
798        .enumerate()
799        .map(|(i, f)| (f.as_str(), methods.len() + i))
800        .collect();
801
802    // Initialize union-find
803    let mut uf = UnionFind::new(methods.len() + fields.len());
804
805    // Connect methods to fields they access
806    for (i, method) in methods.iter().enumerate() {
807        for field in &method.field_accesses {
808            if let Some(&fi) = field_idx.get(field.as_str()) {
809                uf.union(i, fi);
810            }
811        }
812    }
813
814    // Connect methods that call each other
815    for (i, method) in methods.iter().enumerate() {
816        for called in &method.method_calls {
817            if method_names.contains(called.as_str()) {
818                if let Some(&ci) = method_idx.get(called.as_str()) {
819                    uf.union(i, ci);
820                }
821            }
822        }
823    }
824
825    // Check if limit was exceeded
826    if uf.limit_exceeded() {
827        return (
828            0,
829            vec![ComponentInfo {
830                methods: vec!["<analysis incomplete>".to_string()],
831                fields: vec![],
832            }],
833        );
834    }
835
836    // Build component infos
837    let raw_components = uf.get_components();
838    let mut component_infos: Vec<ComponentInfo> = Vec::new();
839
840    for (_, members) in raw_components {
841        let mut ci = ComponentInfo {
842            methods: Vec::new(),
843            fields: Vec::new(),
844        };
845
846        for member_idx in members {
847            if member_idx < methods.len() {
848                ci.methods.push(methods[member_idx].name.clone());
849            } else {
850                let field_pos = member_idx - methods.len();
851                if field_pos < fields.len() {
852                    ci.fields.push(fields[field_pos].clone());
853                }
854            }
855        }
856
857        // Only include components that have at least one method
858        if !ci.methods.is_empty() {
859            ci.methods.sort();
860            ci.fields.sort();
861            component_infos.push(ci);
862        }
863    }
864
865    // Sort components by first method name for deterministic output
866    component_infos.sort_by(|a, b| a.methods.first().cmp(&b.methods.first()));
867
868    let lcom4 = component_infos.len() as u32;
869    (lcom4.max(1), component_infos) // LCOM4 is at least 1 if there are methods
870}
871
872/// Generate a split suggestion for a class with LCOM4 > 1.
873fn generate_split_suggestion(class_name: &str, components: &[ComponentInfo]) -> String {
874    if components.is_empty() {
875        return format!("Consider splitting {} into multiple classes", class_name);
876    }
877
878    let parts: Vec<String> = components
879        .iter()
880        .map(|c| {
881            let methods_str = c.methods.join(", ");
882            format!("[{}]", methods_str)
883        })
884        .collect();
885
886    format!(
887        "Consider splitting {} into {} classes: {}",
888        class_name,
889        components.len(),
890        parts.join(" + ")
891    )
892}
893
894/// Compute summary statistics for a set of class cohesion results.
895fn compute_summary(classes: &[ClassCohesion]) -> CohesionSummary {
896    let total = classes.len() as u32;
897    if total == 0 {
898        return CohesionSummary::default();
899    }
900
901    let cohesive = classes
902        .iter()
903        .filter(|c| c.verdict == CohesionVerdict::Cohesive)
904        .count() as u32;
905
906    let split_candidates = total - cohesive;
907
908    let avg_lcom4 = classes.iter().map(|c| c.lcom4 as f64).sum::<f64>() / total as f64;
909
910    CohesionSummary {
911        total_classes: total,
912        cohesive,
913        split_candidates,
914        avg_lcom4: (avg_lcom4 * 100.0).round() / 100.0, // Round to 2 decimal places
915    }
916}
917
918/// Get text content of a node.
919fn get_node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
920    let start = node.start_byte();
921    let end = node.end_byte();
922    if end <= source.len() {
923        std::str::from_utf8(&source[start..end]).unwrap_or("")
924    } else {
925        ""
926    }
927}
928
929// =============================================================================
930// Text Formatting
931// =============================================================================
932
933/// Format a cohesion report as human-readable text.
934///
935/// Shows split candidate classes sorted worst-first (highest LCOM4), with
936/// color-coded severity, path stripping, component details, and split suggestions.
937/// Top 30 entries shown by default with overflow message.
938///
939/// ```text
940/// Cohesion Analysis (LCOM4)
941///
942/// LCOM4  Methods  Fields  Class                         File
943///     4        8       6  UserManager                   models/user.py:42
944///     |-- Component 1: create, update [db, cache]
945///     |-- Component 2: send_email [mailer]
946///     `-- Suggestion: Split into 4 focused classes
947///     3        6       4  OrderProcessor                services/order.py:15
948///     |-- Component 1: process, submit [queue]
949///     `-- Suggestion: Split into 3 focused classes
950///
951/// Summary: 47 classes, 12 split candidates (25.5%), avg LCOM4: 1.82
952/// ```
953pub fn format_cohesion_text(report: &CohesionReport) -> String {
954    let mut output = String::new();
955
956    let s = &report.summary;
957    output.push_str(&format!(
958        "Cohesion Analysis (LCOM4) ({} classes, {} split candidates)\n\n",
959        s.total_classes, s.split_candidates
960    ));
961
962    // Filter to split candidates only (LCOM4 > 1) and sort worst-first
963    let mut candidates: Vec<&ClassCohesion> = report
964        .classes
965        .iter()
966        .filter(|c| c.verdict == CohesionVerdict::SplitCandidate)
967        .collect();
968    candidates.sort_by(|a, b| b.lcom4.cmp(&a.lcom4));
969
970    if candidates.is_empty() {
971        output.push_str("  No split candidates found.\n\n");
972        output.push_str(&format_cohesion_summary(s));
973        return output;
974    }
975
976    // Compute common path prefix for relative display
977    let paths: Vec<&Path> = candidates
978        .iter()
979        .filter_map(|c| Path::new(c.file_path.as_str()).parent())
980        .collect();
981    let prefix = if paths.is_empty() {
982        std::path::PathBuf::new()
983    } else {
984        common_path_prefix(&paths)
985    };
986
987    // Header
988    output.push_str(&format!(
989        " {:>5}  {:>7}  {:>6}  {:<28}  {}\n",
990        "LCOM4", "Methods", "Fields", "Class", "File"
991    ));
992
993    // Show top 30
994    let limit = candidates.len().min(30);
995    for class in candidates.iter().take(limit) {
996        let rel = strip_prefix_display(Path::new(&class.file_path), &prefix);
997        let lcom4_str = format_lcom4_colored(class.lcom4);
998
999        // Truncate class name to 28 chars
1000        let name = if class.class_name.len() > 28 {
1001            format!("{}...", &class.class_name[..25])
1002        } else {
1003            class.class_name.clone()
1004        };
1005
1006        output.push_str(&format!(
1007            " {:>5}  {:>7}  {:>6}  {:<28}  {}:{}\n",
1008            lcom4_str, class.method_count, class.field_count, name, rel, class.line
1009        ));
1010
1011        // Show component details for split candidates
1012        if !class.components.is_empty() {
1013            let comp_count = class.components.len();
1014            for (i, comp) in class.components.iter().enumerate() {
1015                let is_last = i == comp_count - 1 && class.split_suggestion.is_none();
1016                let connector = if is_last { "`--" } else { "|--" };
1017                let methods_str = comp.methods.join(", ");
1018                let fields_str = if comp.fields.is_empty() {
1019                    String::new()
1020                } else {
1021                    format!(" [{}]", comp.fields.join(", "))
1022                };
1023                output.push_str(&format!(
1024                    "     {}  Component {}: {}{}\n",
1025                    connector,
1026                    i + 1,
1027                    methods_str,
1028                    fields_str
1029                ));
1030            }
1031        }
1032
1033        // Show split suggestion
1034        if let Some(ref suggestion) = class.split_suggestion {
1035            output.push_str(&format!("     `--  Suggestion: {}\n", suggestion));
1036        }
1037    }
1038
1039    if candidates.len() > limit {
1040        output.push_str(&format!(
1041            "\n  ... and {} more split candidates\n",
1042            candidates.len() - limit
1043        ));
1044    }
1045
1046    output.push('\n');
1047    output.push_str(&format_cohesion_summary(s));
1048
1049    output
1050}
1051
1052/// Format LCOM4 value with color coding based on severity.
1053fn format_lcom4_colored(lcom4: u32) -> String {
1054    if lcom4 >= 4 {
1055        format!("{}", lcom4).red().bold().to_string()
1056    } else if lcom4 >= 2 {
1057        format!("{}", lcom4).yellow().to_string()
1058    } else {
1059        format!("{}", lcom4).green().to_string()
1060    }
1061}
1062
1063/// Format the cohesion summary line.
1064fn format_cohesion_summary(s: &CohesionSummary) -> String {
1065    let pct = if s.total_classes > 0 {
1066        (s.split_candidates as f64 / s.total_classes as f64) * 100.0
1067    } else {
1068        0.0
1069    };
1070    format!(
1071        "Summary: {} classes, {} split candidates ({:.1}%), avg LCOM4: {:.2}\n",
1072        s.total_classes, s.split_candidates, pct, s.avg_lcom4
1073    )
1074}
1075
1076// =============================================================================
1077// Public Entry Point
1078// =============================================================================
1079
1080/// Run cohesion analysis (for programmatic use).
1081pub fn run(args: CohesionArgs) -> Result<CohesionReport> {
1082    let start = Instant::now();
1083    let timeout = Duration::from_secs(args.timeout);
1084
1085    // Validate path
1086    let canonical_path = if let Some(ref root) = args.project_root {
1087        validate_file_path_in_project(&args.path, root)?
1088    } else {
1089        validate_file_path(&args.path)?
1090    };
1091
1092    // Analyze based on path type
1093    let report = if canonical_path.is_dir() {
1094        analyze_directory(&canonical_path, &args, start, timeout)?
1095    } else {
1096        analyze_single_file(&canonical_path, &args)?
1097    };
1098
1099    Ok(report)
1100}
1101
1102// =============================================================================
1103// Tests
1104// =============================================================================
1105
1106#[cfg(test)]
1107mod tests {
1108    use super::*;
1109
1110    #[test]
1111    fn test_union_find_basic() {
1112        let mut uf = UnionFind::new(5);
1113
1114        // Initially all separate
1115        assert_eq!(uf.find(0), Some(0));
1116        assert_eq!(uf.find(1), Some(1));
1117
1118        // Union 0 and 1
1119        assert!(uf.union(0, 1));
1120        assert_eq!(uf.find(0), uf.find(1));
1121
1122        // Union 2 and 3
1123        assert!(uf.union(2, 3));
1124        assert_eq!(uf.find(2), uf.find(3));
1125
1126        // Different components
1127        assert_ne!(uf.find(0), uf.find(2));
1128
1129        // Union the two components
1130        assert!(uf.union(1, 3));
1131        assert_eq!(uf.find(0), uf.find(3));
1132    }
1133
1134    #[test]
1135    fn test_union_find_path_compression() {
1136        let mut uf = UnionFind::new(10);
1137
1138        // Create a chain: 0 -> 1 -> 2 -> 3 -> 4
1139        for i in 0..4 {
1140            uf.union(i, i + 1);
1141        }
1142
1143        // After find with path compression, all should point to root
1144        let root = uf.find(0).unwrap();
1145        for i in 0..5 {
1146            assert_eq!(uf.find(i), Some(root));
1147        }
1148    }
1149
1150    #[test]
1151    fn test_union_find_count_components() {
1152        let mut uf = UnionFind::new(6);
1153
1154        // Create two components: {0, 1, 2} and {3, 4, 5}
1155        uf.union(0, 1);
1156        uf.union(1, 2);
1157        uf.union(3, 4);
1158        uf.union(4, 5);
1159
1160        assert_eq!(uf.count_components(6), 2);
1161    }
1162
1163    #[test]
1164    fn test_is_dunder() {
1165        assert!(is_dunder("__init__"));
1166        assert!(is_dunder("__str__"));
1167        assert!(is_dunder("__eq__"));
1168        assert!(!is_dunder("_private"));
1169        assert!(!is_dunder("__private"));
1170        assert!(!is_dunder("public__"));
1171        assert!(!is_dunder("normal"));
1172    }
1173
1174    #[test]
1175    fn test_compute_summary() {
1176        let classes = vec![
1177            ClassCohesion {
1178                class_name: "A".to_string(),
1179                file_path: "test.py".to_string(),
1180                line: 1,
1181                lcom4: 1,
1182                method_count: 3,
1183                field_count: 2,
1184                verdict: CohesionVerdict::Cohesive,
1185                split_suggestion: None,
1186                components: vec![],
1187            },
1188            ClassCohesion {
1189                class_name: "B".to_string(),
1190                file_path: "test.py".to_string(),
1191                line: 10,
1192                lcom4: 2,
1193                method_count: 4,
1194                field_count: 3,
1195                verdict: CohesionVerdict::SplitCandidate,
1196                split_suggestion: Some("Split B".to_string()),
1197                components: vec![],
1198            },
1199        ];
1200
1201        let summary = compute_summary(&classes);
1202        assert_eq!(summary.total_classes, 2);
1203        assert_eq!(summary.cohesive, 1);
1204        assert_eq!(summary.split_candidates, 1);
1205        assert!((summary.avg_lcom4 - 1.5).abs() < 0.01);
1206    }
1207
1208    #[test]
1209    fn test_generate_split_suggestion() {
1210        let components = vec![
1211            ComponentInfo {
1212                methods: vec!["method_a".to_string(), "method_b".to_string()],
1213                fields: vec!["field_x".to_string()],
1214            },
1215            ComponentInfo {
1216                methods: vec!["method_c".to_string()],
1217                fields: vec!["field_y".to_string()],
1218            },
1219        ];
1220
1221        let suggestion = generate_split_suggestion("MyClass", &components);
1222        assert!(suggestion.contains("MyClass"));
1223        assert!(suggestion.contains("2 classes"));
1224        assert!(suggestion.contains("method_a"));
1225        assert!(suggestion.contains("method_c"));
1226    }
1227
1228    // =========================================================================
1229    // format_cohesion_text tests
1230    // =========================================================================
1231
1232    /// Helper to build a ClassCohesion for tests.
1233    fn make_class(
1234        name: &str,
1235        location: (&str, u32),
1236        lcom4: u32,
1237        methods: u32,
1238        fields: u32,
1239        components: Vec<ComponentInfo>,
1240        suggestion: Option<&str>,
1241    ) -> ClassCohesion {
1242        let (file, line) = location;
1243        ClassCohesion {
1244            class_name: name.to_string(),
1245            file_path: file.to_string(),
1246            line,
1247            lcom4,
1248            method_count: methods,
1249            field_count: fields,
1250            verdict: CohesionVerdict::from_lcom4(lcom4),
1251            split_suggestion: suggestion.map(|s| s.to_string()),
1252            components,
1253        }
1254    }
1255
1256    #[test]
1257    fn test_format_cohesion_text_sorts_worst_first() {
1258        let report = CohesionReport {
1259            classes: vec![
1260                make_class("Low", ("src/a.py", 1), 2, 3, 2, vec![], None),
1261                make_class("High", ("src/b.py", 5), 5, 8, 6, vec![], None),
1262                make_class("Mid", ("src/c.py", 10), 3, 5, 4, vec![], None),
1263            ],
1264            summary: CohesionSummary {
1265                total_classes: 3,
1266                cohesive: 0,
1267                split_candidates: 3,
1268                avg_lcom4: 3.33,
1269            },
1270        };
1271        let text = format_cohesion_text(&report);
1272        // "High" (LCOM4=5) should appear before "Mid" (3) before "Low" (2)
1273        let high_pos = text.find("High").expect("High not found");
1274        let mid_pos = text.find("Mid").expect("Mid not found");
1275        let low_pos = text.find("Low").expect("Low not found");
1276        assert!(
1277            high_pos < mid_pos,
1278            "High (LCOM4=5) should appear before Mid (LCOM4=3)"
1279        );
1280        assert!(
1281            mid_pos < low_pos,
1282            "Mid (LCOM4=3) should appear before Low (LCOM4=2)"
1283        );
1284    }
1285
1286    #[test]
1287    fn test_format_cohesion_text_filters_cohesive_classes() {
1288        let report = CohesionReport {
1289            classes: vec![
1290                make_class("Cohesive", ("src/a.py", 1), 1, 3, 2, vec![], None),
1291                make_class("NeedsSplit", ("src/b.py", 5), 3, 6, 4, vec![], None),
1292            ],
1293            summary: CohesionSummary {
1294                total_classes: 2,
1295                cohesive: 1,
1296                split_candidates: 1,
1297                avg_lcom4: 2.0,
1298            },
1299        };
1300        let text = format_cohesion_text(&report);
1301        // Cohesive class (LCOM4=1) should NOT appear in the table rows
1302        // but NeedsSplit (LCOM4=3) should appear
1303        assert!(
1304            !text.contains("Cohesive"),
1305            "Cohesive classes should be filtered out"
1306        );
1307        assert!(
1308            text.contains("NeedsSplit"),
1309            "Split candidates should appear"
1310        );
1311    }
1312
1313    #[test]
1314    fn test_format_cohesion_text_limits_to_30() {
1315        // Create 35 split candidates
1316        let classes: Vec<ClassCohesion> = (0..35)
1317            .map(|i| {
1318                make_class(
1319                    &format!("Class{}", i),
1320                    (&format!("src/mod{}.py", i), i + 1),
1321                    2,
1322                    4,
1323                    3,
1324                    vec![],
1325                    None,
1326                )
1327            })
1328            .collect();
1329        let report = CohesionReport {
1330            classes,
1331            summary: CohesionSummary {
1332                total_classes: 35,
1333                cohesive: 0,
1334                split_candidates: 35,
1335                avg_lcom4: 2.0,
1336            },
1337        };
1338        let text = format_cohesion_text(&report);
1339        assert!(
1340            text.contains("and 5 more"),
1341            "Should show overflow message for remaining 5 classes"
1342        );
1343    }
1344
1345    #[test]
1346    fn test_format_cohesion_text_strips_common_path_prefix() {
1347        let report = CohesionReport {
1348            classes: vec![
1349                make_class("A", ("src/models/user.py", 1), 3, 5, 4, vec![], None),
1350                make_class("B", ("src/models/order.py", 10), 2, 4, 3, vec![], None),
1351            ],
1352            summary: CohesionSummary {
1353                total_classes: 2,
1354                cohesive: 0,
1355                split_candidates: 2,
1356                avg_lcom4: 2.5,
1357            },
1358        };
1359        let text = format_cohesion_text(&report);
1360        // The common prefix "src/models/" should be stripped, showing just filenames
1361        assert!(
1362            text.contains("user.py"),
1363            "Should display stripped path: user.py"
1364        );
1365        assert!(
1366            text.contains("order.py"),
1367            "Should display stripped path: order.py"
1368        );
1369        // Full path should not appear
1370        assert!(
1371            !text.contains("src/models/user.py"),
1372            "Full path should be stripped"
1373        );
1374    }
1375
1376    #[test]
1377    fn test_format_cohesion_text_has_header() {
1378        let report = CohesionReport {
1379            classes: vec![make_class("A", ("src/a.py", 1), 2, 3, 2, vec![], None)],
1380            summary: CohesionSummary {
1381                total_classes: 1,
1382                cohesive: 0,
1383                split_candidates: 1,
1384                avg_lcom4: 2.0,
1385            },
1386        };
1387        let text = format_cohesion_text(&report);
1388        assert!(
1389            text.contains("Cohesion Analysis"),
1390            "Should have title header"
1391        );
1392        assert!(
1393            text.contains("LCOM4") && text.contains("Methods") && text.contains("Fields"),
1394            "Should have column headers"
1395        );
1396        assert!(
1397            text.contains("Class") && text.contains("File"),
1398            "Should have Class and File columns"
1399        );
1400    }
1401
1402    #[test]
1403    fn test_format_cohesion_text_summary_line() {
1404        let report = CohesionReport {
1405            classes: vec![],
1406            summary: CohesionSummary {
1407                total_classes: 47,
1408                cohesive: 35,
1409                split_candidates: 12,
1410                avg_lcom4: 1.82,
1411            },
1412        };
1413        let text = format_cohesion_text(&report);
1414        assert!(
1415            text.contains("47 classes"),
1416            "Summary should show total classes"
1417        );
1418        assert!(
1419            text.contains("12 split candidates"),
1420            "Summary should show split candidate count"
1421        );
1422        assert!(text.contains("1.82"), "Summary should show avg LCOM4");
1423    }
1424
1425    #[test]
1426    fn test_format_cohesion_text_shows_components() {
1427        let components = vec![
1428            ComponentInfo {
1429                methods: vec!["create".to_string(), "update".to_string()],
1430                fields: vec!["db".to_string(), "cache".to_string()],
1431            },
1432            ComponentInfo {
1433                methods: vec!["send_email".to_string()],
1434                fields: vec!["mailer".to_string()],
1435            },
1436        ];
1437        let report = CohesionReport {
1438            classes: vec![make_class(
1439                "UserManager",
1440                ("src/user.py", 1),
1441                2,
1442                3,
1443                3,
1444                components,
1445                Some("Split into 2 focused classes"),
1446            )],
1447            summary: CohesionSummary {
1448                total_classes: 1,
1449                cohesive: 0,
1450                split_candidates: 1,
1451                avg_lcom4: 2.0,
1452            },
1453        };
1454        let text = format_cohesion_text(&report);
1455        // Should show component info
1456        assert!(text.contains("Component 1"), "Should show Component 1");
1457        assert!(
1458            text.contains("create") && text.contains("update"),
1459            "Should show methods in component"
1460        );
1461        assert!(
1462            text.contains("db") && text.contains("cache"),
1463            "Should show fields in component"
1464        );
1465        assert!(text.contains("Component 2"), "Should show Component 2");
1466        assert!(
1467            text.contains("send_email"),
1468            "Should show methods in component 2"
1469        );
1470        // Should show suggestion
1471        assert!(
1472            text.contains("Split into 2 focused classes"),
1473            "Should show split suggestion"
1474        );
1475    }
1476
1477    #[test]
1478    fn test_format_cohesion_text_empty_report() {
1479        let report = CohesionReport {
1480            classes: vec![],
1481            summary: CohesionSummary {
1482                total_classes: 0,
1483                cohesive: 0,
1484                split_candidates: 0,
1485                avg_lcom4: 0.0,
1486            },
1487        };
1488        let text = format_cohesion_text(&report);
1489        assert!(
1490            text.contains("No split candidates"),
1491            "Empty report should show 'No split candidates' message"
1492        );
1493    }
1494
1495    #[test]
1496    fn test_format_cohesion_text_all_cohesive() {
1497        let report = CohesionReport {
1498            classes: vec![
1499                make_class("Good1", ("src/a.py", 1), 1, 5, 3, vec![], None),
1500                make_class("Good2", ("src/b.py", 10), 1, 4, 2, vec![], None),
1501            ],
1502            summary: CohesionSummary {
1503                total_classes: 2,
1504                cohesive: 2,
1505                split_candidates: 0,
1506                avg_lcom4: 1.0,
1507            },
1508        };
1509        let text = format_cohesion_text(&report);
1510        // All classes are cohesive, so no table rows should appear
1511        assert!(
1512            text.contains("No split candidates"),
1513            "All-cohesive report should show 'No split candidates'"
1514        );
1515    }
1516
1517    #[test]
1518    fn test_cohesion_args_lang_flag() {
1519        // Verify CohesionArgs has a lang field of type Option<Language>
1520        let args = CohesionArgs {
1521            path: PathBuf::from("src/"),
1522            min_methods: 2,
1523            include_dunder: false,
1524            output_format: OutputFormat::Json,
1525            timeout: 30,
1526            project_root: None,
1527            lang: Some(Language::Rust),
1528        };
1529        assert_eq!(args.lang, Some(Language::Rust));
1530
1531        // Also test None case (auto-detect)
1532        let args_auto = CohesionArgs {
1533            path: PathBuf::from("src/"),
1534            min_methods: 2,
1535            include_dunder: false,
1536            output_format: OutputFormat::Json,
1537            timeout: 30,
1538            project_root: None,
1539            lang: None,
1540        };
1541        assert_eq!(args_auto.lang, None);
1542    }
1543}