Skip to main content

tldr_cli/commands/patterns/
cohesion.rs

1//! Cohesion command - LCOM4 (Lack of Cohesion of Methods) analysis for Python classes.
2//!
3//! LCOM4 measures class cohesion by counting connected components in the method-field graph:
4//! - LCOM4 = 1: All methods are connected (cohesive class)
5//! - LCOM4 > 1: Methods form disconnected groups (split candidate)
6//!
7//! # Algorithm
8//!
9//! 1. Parse class, extract methods and field accesses (`self.x`)
10//! 2. Build bipartite graph: methods <-> fields they access
11//! 3. Add edges for intra-class method calls (`self.method()`)
12//! 4. Count connected components via union-find with path compression
13//!
14//! # TIGER Mitigations
15//!
16//! - **T06**: Union-find with path compression AND union by rank
17//! - **E01**: `--timeout` flag (default 30s)
18//! - **E04**: `MAX_METHODS_PER_CLASS` and `MAX_FIELDS_PER_CLASS` limits
19//! - **E05**: `MAX_ITERATIONS` for union-find operations
20//!
21//! # Example
22//!
23//! ```bash
24//! tldr cohesion src/models.py
25//! tldr cohesion src/models.py --min-methods 3 --include-dunder
26//! tldr cohesion src/ --format text
27//! ```
28
29use std::collections::{HashMap, HashSet};
30use std::path::{Path, PathBuf};
31use std::time::{Duration, Instant};
32
33use anyhow::Result;
34use clap::{Args, ValueEnum};
35use colored::Colorize;
36use serde::{Deserialize, Serialize};
37use tldr_core::walker::walk_project;
38use tree_sitter::{Node, Parser, Tree};
39use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
40
41use tldr_core::quality::cohesion as core_cohesion;
42use tldr_core::types::Language;
43
44use crate::output::{common_path_prefix, strip_prefix_display, OutputFormat as GlobalOutputFormat};
45
46use super::error::{PatternsError, PatternsResult};
47use super::types::{
48    ClassCohesion, CohesionReport, CohesionSummary, CohesionVerdict, ComponentInfo,
49};
50use super::validation::{
51    read_file_safe, validate_directory_path, validate_file_path, validate_file_path_in_project,
52    MAX_CLASSES_PER_FILE, MAX_DIRECTORY_FILES, MAX_FIELDS_PER_CLASS, MAX_METHODS_PER_CLASS,
53};
54
55// =============================================================================
56// Constants (TIGER/ELEPHANT Mitigations)
57// =============================================================================
58
59/// Maximum union-find iterations to prevent infinite loops (E05)
60const MAX_UNION_FIND_ITERATIONS: usize = 10_000;
61
62/// Default timeout in seconds (E01)
63const DEFAULT_TIMEOUT_SECS: u64 = 30;
64
65// =============================================================================
66// Output Format
67// =============================================================================
68
69/// Output format for cohesion command
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Serialize, Deserialize)]
71#[serde(rename_all = "lowercase")]
72pub enum OutputFormat {
73    /// JSON output (default)
74    #[default]
75    Json,
76    /// Human-readable text output
77    Text,
78}
79
80// =============================================================================
81// CLI Arguments
82// =============================================================================
83
84/// Compute LCOM4 (Lack of Cohesion of Methods) metric for Python classes.
85///
86/// LCOM4 measures class cohesion by counting connected components in the
87/// method-field bipartite graph. A cohesive class has LCOM4 = 1, while
88/// a class with LCOM4 > 1 is a candidate for splitting.
89///
90/// # Example
91///
92/// ```bash
93/// tldr cohesion src/models.py
94/// tldr cohesion src/models.py --min-methods 3
95/// tldr cohesion src/ --format text
96/// ```
97#[derive(Debug, Args)]
98pub struct CohesionArgs {
99    /// File or directory to analyze
100    pub path: PathBuf,
101
102    /// Minimum number of instance methods for a class to be included in analysis.
103    /// Classes with fewer methods are filtered from results. For Rust and Go,
104    /// only instance methods (with self/receiver) are counted, not associated
105    /// functions like new() or default().
106    #[arg(long, default_value = "1")]
107    pub min_methods: u32,
108
109    /// Include dunder methods (__init__, __str__, etc.) in analysis
110    #[arg(long)]
111    pub include_dunder: bool,
112
113    /// Output format (json or text). Prefer global --format/-f flag.
114    #[arg(
115        long = "output-format",
116        alias = "output",
117        short = 'o',
118        hide = true,
119        value_enum,
120        default_value = "json"
121    )]
122    pub output_format: OutputFormat,
123
124    /// Analysis timeout in seconds
125    #[arg(long, default_value_t = DEFAULT_TIMEOUT_SECS)]
126    pub timeout: u64,
127
128    /// Project root for path validation (optional)
129    #[arg(long)]
130    pub project_root: Option<PathBuf>,
131
132    /// Language filter (auto-detected if omitted)
133    #[arg(long, short = 'l')]
134    pub lang: Option<Language>,
135}
136
137impl CohesionArgs {
138    /// Run the cohesion analysis command
139    pub fn run(&self, global_format: GlobalOutputFormat) -> Result<()> {
140        let start = Instant::now();
141        let timeout = Duration::from_secs(self.timeout);
142
143        // Validate path
144        let canonical_path = if let Some(ref root) = self.project_root {
145            validate_file_path_in_project(&self.path, root)?
146        } else {
147            validate_file_path(&self.path)?
148        };
149
150        // Analyze based on path type
151        let report = if canonical_path.is_dir() {
152            analyze_directory(&canonical_path, self, start, timeout)?
153        } else {
154            analyze_single_file(&canonical_path, self)?
155        };
156
157        // Resolve format: global -f flag takes priority over hidden --output-format
158        let use_text = matches!(global_format, GlobalOutputFormat::Text)
159            || matches!(self.output_format, OutputFormat::Text);
160
161        // Output based on format
162        if use_text {
163            let text = format_cohesion_text(&report);
164            println!("{}", text);
165        } else {
166            let json = serde_json::to_string_pretty(&report)?;
167            println!("{}", json);
168        }
169
170        Ok(())
171    }
172}
173
174// =============================================================================
175// Union-Find with Path Compression (TIGER-06)
176// =============================================================================
177
178/// Union-Find (Disjoint Set Union) data structure with path compression and union by rank.
179///
180/// This implementation uses both optimizations to achieve near-O(1) amortized time per operation:
181/// - **Path compression**: Flatten tree during `find` operations
182/// - **Union by rank**: Attach smaller tree under root of larger tree
183///
184/// # TIGER-06 Mitigation
185///
186/// Path compression prevents worst-case O(n) find operations.
187/// Union by rank keeps trees balanced.
188#[derive(Debug, Clone)]
189pub struct UnionFind {
190    /// Parent pointers (index -> parent index)
191    parent: Vec<usize>,
192    /// Rank for union by rank optimization
193    rank: Vec<usize>,
194    /// Iteration counter to prevent infinite loops (E05)
195    iterations: usize,
196    /// Maximum allowed iterations
197    max_iterations: usize,
198}
199
200impl UnionFind {
201    /// Create a new union-find structure with n elements
202    pub fn new(n: usize) -> Self {
203        Self {
204            parent: (0..n).collect(),
205            rank: vec![0; n],
206            iterations: 0,
207            max_iterations: MAX_UNION_FIND_ITERATIONS,
208        }
209    }
210
211    /// Find the root of the set containing x, with path compression.
212    ///
213    /// Returns None if max iterations exceeded.
214    pub fn find(&mut self, x: usize) -> Option<usize> {
215        if x >= self.parent.len() {
216            return None;
217        }
218
219        // Find root
220        let mut root = x;
221        while self.parent[root] != root {
222            self.iterations += 1;
223            if self.iterations > self.max_iterations {
224                return None; // Exceeded iteration limit
225            }
226            root = self.parent[root];
227        }
228
229        // Path compression: point all nodes on path directly to root
230        let mut current = x;
231        while self.parent[current] != root {
232            self.iterations += 1;
233            if self.iterations > self.max_iterations {
234                return None;
235            }
236            let next = self.parent[current];
237            self.parent[current] = root;
238            current = next;
239        }
240
241        Some(root)
242    }
243
244    /// Union the sets containing x and y, using union by rank.
245    ///
246    /// Returns true if a union was performed, false if already in same set or error.
247    pub fn union(&mut self, x: usize, y: usize) -> bool {
248        let root_x = match self.find(x) {
249            Some(r) => r,
250            None => return false,
251        };
252        let root_y = match self.find(y) {
253            Some(r) => r,
254            None => return false,
255        };
256
257        if root_x == root_y {
258            return false; // Already in same set
259        }
260
261        // Union by rank: attach smaller tree under root of larger tree
262        match self.rank[root_x].cmp(&self.rank[root_y]) {
263            std::cmp::Ordering::Less => {
264                self.parent[root_x] = root_y;
265            }
266            std::cmp::Ordering::Greater => {
267                self.parent[root_y] = root_x;
268            }
269            std::cmp::Ordering::Equal => {
270                self.parent[root_y] = root_x;
271                self.rank[root_x] += 1;
272            }
273        }
274
275        true
276    }
277
278    /// Count the number of unique connected components.
279    ///
280    /// Only counts components for the first `method_count` elements (ignoring fields
281    /// that are only connected to a single method).
282    pub fn count_components(&mut self, method_count: usize) -> usize {
283        let mut roots = HashSet::new();
284        for i in 0..method_count.min(self.parent.len()) {
285            if let Some(root) = self.find(i) {
286                roots.insert(root);
287            }
288        }
289        roots.len()
290    }
291
292    /// Get components as groups of indices.
293    pub fn get_components(&mut self) -> HashMap<usize, Vec<usize>> {
294        let mut components: HashMap<usize, Vec<usize>> = HashMap::new();
295        for i in 0..self.parent.len() {
296            if let Some(root) = self.find(i) {
297                components.entry(root).or_default().push(i);
298            }
299        }
300        components
301    }
302
303    /// Check if iteration limit was exceeded
304    pub fn limit_exceeded(&self) -> bool {
305        self.iterations > self.max_iterations
306    }
307}
308
309// =============================================================================
310// Method Analysis
311// =============================================================================
312
313/// Analysis result for a single method
314#[derive(Debug, Clone)]
315struct MethodAnalysis {
316    /// Method name
317    name: String,
318    /// Fields accessed by this method (self.x)
319    field_accesses: Vec<String>,
320    /// Other methods called (self.method())
321    method_calls: Vec<String>,
322}
323
324// =============================================================================
325// Core Analysis Functions
326// =============================================================================
327
328/// Analyze a single file for class cohesion.
329///
330/// For Python files, uses the CLI's own Python-specific implementation.
331/// For all other supported languages (Java, TypeScript, Go, Rust, etc.),
332/// delegates to the core library's multi-language analyzer.
333fn analyze_single_file(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
334    let lang = Language::from_path(path);
335
336    // For non-Python languages, delegate to the core multi-language analyzer
337    if lang != Some(Language::Python) && lang.is_some() {
338        return analyze_single_file_core(path, args);
339    }
340
341    // Python: use the CLI's existing Python-specific implementation
342    let source = read_file_safe(path)?;
343    let tree = parse_python(&source, path)?;
344    let classes = analyze_file_ast(&tree, &source, path, args)?;
345
346    let summary = compute_summary(&classes);
347
348    Ok(CohesionReport { classes, summary })
349}
350
351/// Analyze a single non-Python file using the core library.
352fn analyze_single_file_core(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
353    let threshold = 2;
354    let core_report = core_cohesion::analyze_cohesion(path, None, threshold).map_err(|e| {
355        PatternsError::ParseError {
356            file: path.to_path_buf(),
357            message: format!("Core cohesion analysis failed: {}", e),
358        }
359    })?;
360
361    // Convert core types to CLI types
362    let classes: Vec<ClassCohesion> = core_report
363        .classes
364        .into_iter()
365        .filter(|c| c.method_count >= args.min_methods as usize)
366        .map(|c| ClassCohesion {
367            class_name: c.name,
368            file_path: c.file.display().to_string(),
369            line: c.line as u32,
370            lcom4: c.lcom4 as u32,
371            method_count: c.method_count as u32,
372            field_count: c.field_count as u32,
373            verdict: match c.verdict {
374                core_cohesion::CohesionVerdict::Cohesive => CohesionVerdict::Cohesive,
375                core_cohesion::CohesionVerdict::SplitCandidate => CohesionVerdict::SplitCandidate,
376            },
377            split_suggestion: c.split_suggestion,
378            components: c
379                .components
380                .into_iter()
381                .map(|comp| ComponentInfo {
382                    methods: comp.methods,
383                    fields: comp.fields,
384                })
385                .collect(),
386        })
387        .collect();
388
389    let summary = compute_summary(&classes);
390    Ok(CohesionReport { classes, summary })
391}
392
393/// Analyze a directory of source files for class cohesion.
394///
395/// Supports Python, Java, TypeScript, JavaScript, Go, Rust, and other
396/// languages supported by the core library.
397fn analyze_directory(
398    dir: &Path,
399    args: &CohesionArgs,
400    start: Instant,
401    timeout: Duration,
402) -> PatternsResult<CohesionReport> {
403    validate_directory_path(dir)?;
404
405    let mut all_classes = Vec::new();
406    let mut file_count = 0u32;
407
408    for entry in walk_project(dir) {
409        // Check timeout
410        if start.elapsed() > timeout {
411            return Err(PatternsError::Timeout {
412                timeout_secs: args.timeout,
413            });
414        }
415
416        // Check file limit
417        if file_count >= MAX_DIRECTORY_FILES {
418            return Err(PatternsError::TooManyFiles {
419                count: file_count,
420                max_files: MAX_DIRECTORY_FILES,
421            });
422        }
423
424        let path = entry.path();
425
426        // Analyze files with recognized language extensions
427        if path.is_file() && Language::from_path(path).is_some() {
428            file_count += 1;
429
430            // Skip test files unless explicitly included
431            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
432            if filename.starts_with("test_") || filename.ends_with("_test.py") {
433                continue;
434            }
435
436            // Analyze file, collecting errors but continuing
437            match analyze_single_file(path, args) {
438                Ok(report) => {
439                    all_classes.extend(report.classes);
440                }
441                Err(_) => {
442                    // Skip files with parse errors
443                    continue;
444                }
445            }
446        }
447    }
448
449    let summary = compute_summary(&all_classes);
450
451    Ok(CohesionReport {
452        classes: all_classes,
453        summary,
454    })
455}
456
457/// Parse Python source code with tree-sitter.
458fn parse_python(source: &str, file: &Path) -> PatternsResult<Tree> {
459    let mut parser = Parser::new();
460    parser
461        .set_language(&PYTHON_LANGUAGE.into())
462        .map_err(|e| PatternsError::ParseError {
463            file: file.to_path_buf(),
464            message: format!("Failed to set Python language: {}", e),
465        })?;
466
467    parser
468        .parse(source, None)
469        .ok_or_else(|| PatternsError::ParseError {
470            file: file.to_path_buf(),
471            message: "Parsing returned None".to_string(),
472        })
473}
474
475/// Analyze all classes in a parsed Python file.
476fn analyze_file_ast(
477    tree: &Tree,
478    source: &str,
479    file: &Path,
480    args: &CohesionArgs,
481) -> PatternsResult<Vec<ClassCohesion>> {
482    let root = tree.root_node();
483    let source_bytes = source.as_bytes();
484    let mut results = Vec::new();
485    let mut class_count = 0;
486
487    let mut cursor = root.walk();
488    for child in root.children(&mut cursor) {
489        if child.kind() == "class_definition" {
490            class_count += 1;
491            if class_count > MAX_CLASSES_PER_FILE {
492                break; // Limit exceeded
493            }
494
495            if let Some(cohesion) = analyze_class(child, source_bytes, file, args)? {
496                results.push(cohesion);
497            }
498        }
499    }
500
501    Ok(results)
502}
503
504/// Analyze a single class for LCOM4 cohesion.
505fn analyze_class(
506    class_node: Node,
507    source: &[u8],
508    file: &Path,
509    args: &CohesionArgs,
510) -> PatternsResult<Option<ClassCohesion>> {
511    // Get class name
512    let class_name = class_node
513        .child_by_field_name("name")
514        .map(|n| get_node_text(n, source))
515        .unwrap_or("<unknown>")
516        .to_string();
517
518    let line = class_node.start_position().row as u32 + 1;
519
520    // Get class body
521    let body = match class_node.child_by_field_name("body") {
522        Some(b) => b,
523        None => return Ok(None),
524    };
525
526    // Extract methods
527    let methods = extract_methods(body, source, args.include_dunder)?;
528
529    // Filter by min_methods threshold (use all methods for threshold)
530    let all_methods = extract_methods(body, source, true)?;
531    if all_methods.len() < args.min_methods as usize {
532        return Ok(None);
533    }
534
535    // Check method limit (E04)
536    if methods.len() > MAX_METHODS_PER_CLASS {
537        return Ok(Some(ClassCohesion {
538            class_name,
539            file_path: file.display().to_string(),
540            line,
541            lcom4: 0,
542            method_count: methods.len() as u32,
543            field_count: 0,
544            verdict: CohesionVerdict::Cohesive,
545            split_suggestion: Some("Class exceeds MAX_METHODS_PER_CLASS limit".to_string()),
546            components: vec![],
547        }));
548    }
549
550    // Collect all unique fields
551    let mut all_fields: HashSet<String> = HashSet::new();
552    let method_names: HashSet<&str> = methods.iter().map(|m| m.name.as_str()).collect();
553
554    for method in &methods {
555        for field in &method.field_accesses {
556            // Don't count method names as fields
557            if !method_names.contains(field.as_str()) {
558                all_fields.insert(field.clone());
559            }
560        }
561    }
562
563    // Check field limit (E04)
564    if all_fields.len() > MAX_FIELDS_PER_CLASS {
565        return Ok(Some(ClassCohesion {
566            class_name,
567            file_path: file.display().to_string(),
568            line,
569            lcom4: 0,
570            method_count: methods.len() as u32,
571            field_count: all_fields.len() as u32,
572            verdict: CohesionVerdict::Cohesive,
573            split_suggestion: Some("Class exceeds MAX_FIELDS_PER_CLASS limit".to_string()),
574            components: vec![],
575        }));
576    }
577
578    let fields: Vec<String> = all_fields.into_iter().collect();
579
580    // Compute LCOM4
581    let (lcom4, components) = compute_lcom4(&methods, &fields, &method_names);
582
583    // Determine verdict
584    let verdict = CohesionVerdict::from_lcom4(lcom4);
585
586    // Generate split suggestion if needed
587    let split_suggestion = if lcom4 > 1 {
588        Some(generate_split_suggestion(&class_name, &components))
589    } else {
590        None
591    };
592
593    Ok(Some(ClassCohesion {
594        class_name,
595        file_path: file.display().to_string(),
596        line,
597        lcom4,
598        method_count: methods.len() as u32,
599        field_count: fields.len() as u32,
600        verdict,
601        split_suggestion,
602        components,
603    }))
604}
605
606/// Extract methods from a class body.
607fn extract_methods(
608    body: Node,
609    source: &[u8],
610    include_dunder: bool,
611) -> PatternsResult<Vec<MethodAnalysis>> {
612    let mut methods = Vec::new();
613    let mut cursor = body.walk();
614
615    for child in body.children(&mut cursor) {
616        // Handle both sync and async function definitions
617        if child.kind() == "function_definition" || child.kind() == "async_function_definition" {
618            // Get method name
619            let name = child
620                .child_by_field_name("name")
621                .map(|n| get_node_text(n, source))
622                .unwrap_or("")
623                .to_string();
624
625            // Skip static methods and class methods
626            if is_static_or_classmethod(&child, source) {
627                continue;
628            }
629
630            // Filter dunder methods
631            if !include_dunder && is_dunder(&name) {
632                continue;
633            }
634
635            // Extract field accesses (self.x)
636            let field_accesses = extract_field_accesses(child, source);
637
638            // Extract method calls (self.method())
639            let method_calls = extract_method_calls(child, source);
640
641            methods.push(MethodAnalysis {
642                name,
643                field_accesses,
644                method_calls,
645            });
646        }
647    }
648
649    Ok(methods)
650}
651
652/// Check if a method is a dunder method (__xxx__)
653fn is_dunder(name: &str) -> bool {
654    name.starts_with("__") && name.ends_with("__")
655}
656
657/// Check if a method is decorated with @staticmethod or @classmethod
658fn is_static_or_classmethod(node: &Node, source: &[u8]) -> bool {
659    let mut cursor = node.walk();
660    for child in node.children(&mut cursor) {
661        if child.kind() == "decorator" {
662            let text = get_node_text(child, source);
663            if text.contains("staticmethod") || text.contains("classmethod") {
664                return true;
665            }
666        }
667    }
668    false
669}
670
671/// Extract field accesses (self.x) from a method.
672fn extract_field_accesses(method: Node, source: &[u8]) -> Vec<String> {
673    let mut fields = Vec::new();
674    let self_name = get_self_param_name(method, source);
675
676    extract_field_accesses_recursive(method, source, &self_name, &mut fields);
677
678    fields.sort();
679    fields.dedup();
680    fields
681}
682
683fn extract_field_accesses_recursive(
684    node: Node,
685    source: &[u8],
686    self_name: &str,
687    fields: &mut Vec<String>,
688) {
689    // Check if this is a self.x attribute access
690    if node.kind() == "attribute" {
691        if let Some(obj) = node.child_by_field_name("object") {
692            if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
693                if let Some(attr) = node.child_by_field_name("attribute") {
694                    let attr_name = get_node_text(attr, source);
695                    fields.push(attr_name.to_string());
696                }
697            }
698        }
699    }
700
701    // Recurse into children
702    let mut cursor = node.walk();
703    for child in node.children(&mut cursor) {
704        extract_field_accesses_recursive(child, source, self_name, fields);
705    }
706}
707
708/// Extract method calls (self.method()) from a method.
709fn extract_method_calls(method: Node, source: &[u8]) -> Vec<String> {
710    let mut calls = Vec::new();
711    let self_name = get_self_param_name(method, source);
712
713    extract_method_calls_recursive(method, source, &self_name, &mut calls);
714
715    calls.sort();
716    calls.dedup();
717    calls
718}
719
720fn extract_method_calls_recursive(
721    node: Node,
722    source: &[u8],
723    self_name: &str,
724    calls: &mut Vec<String>,
725) {
726    // Check if this is a self.method() call
727    if node.kind() == "call" {
728        if let Some(func) = node.child_by_field_name("function") {
729            if func.kind() == "attribute" {
730                if let Some(obj) = func.child_by_field_name("object") {
731                    if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
732                        if let Some(attr) = func.child_by_field_name("attribute") {
733                            let method_name = get_node_text(attr, source);
734                            calls.push(method_name.to_string());
735                        }
736                    }
737                }
738            }
739        }
740    }
741
742    // Recurse into children
743    let mut cursor = node.walk();
744    for child in node.children(&mut cursor) {
745        extract_method_calls_recursive(child, source, self_name, calls);
746    }
747}
748
749/// Get the name of the self parameter (usually "self" but could be different)
750fn get_self_param_name(method: Node, source: &[u8]) -> String {
751    if let Some(params) = method.child_by_field_name("parameters") {
752        let mut cursor = params.walk();
753        for child in params.children(&mut cursor) {
754            if child.kind() == "identifier" {
755                return get_node_text(child, source).to_string();
756            }
757        }
758    }
759    "self".to_string()
760}
761
762/// Compute LCOM4 using union-find.
763///
764/// # Returns
765/// (lcom4_value, connected_components)
766fn compute_lcom4(
767    methods: &[MethodAnalysis],
768    fields: &[String],
769    method_names: &HashSet<&str>,
770) -> (u32, Vec<ComponentInfo>) {
771    if methods.is_empty() {
772        return (0, vec![]);
773    }
774
775    // Create index mappings
776    let method_idx: HashMap<&str, usize> = methods
777        .iter()
778        .enumerate()
779        .map(|(i, m)| (m.name.as_str(), i))
780        .collect();
781
782    let field_idx: HashMap<&str, usize> = fields
783        .iter()
784        .enumerate()
785        .map(|(i, f)| (f.as_str(), methods.len() + i))
786        .collect();
787
788    // Initialize union-find
789    let mut uf = UnionFind::new(methods.len() + fields.len());
790
791    // Connect methods to fields they access
792    for (i, method) in methods.iter().enumerate() {
793        for field in &method.field_accesses {
794            if let Some(&fi) = field_idx.get(field.as_str()) {
795                uf.union(i, fi);
796            }
797        }
798    }
799
800    // Connect methods that call each other
801    for (i, method) in methods.iter().enumerate() {
802        for called in &method.method_calls {
803            if method_names.contains(called.as_str()) {
804                if let Some(&ci) = method_idx.get(called.as_str()) {
805                    uf.union(i, ci);
806                }
807            }
808        }
809    }
810
811    // Check if limit was exceeded
812    if uf.limit_exceeded() {
813        return (
814            0,
815            vec![ComponentInfo {
816                methods: vec!["<analysis incomplete>".to_string()],
817                fields: vec![],
818            }],
819        );
820    }
821
822    // Build component infos
823    let raw_components = uf.get_components();
824    let mut component_infos: Vec<ComponentInfo> = Vec::new();
825
826    for (_, members) in raw_components {
827        let mut ci = ComponentInfo {
828            methods: Vec::new(),
829            fields: Vec::new(),
830        };
831
832        for member_idx in members {
833            if member_idx < methods.len() {
834                ci.methods.push(methods[member_idx].name.clone());
835            } else {
836                let field_pos = member_idx - methods.len();
837                if field_pos < fields.len() {
838                    ci.fields.push(fields[field_pos].clone());
839                }
840            }
841        }
842
843        // Only include components that have at least one method
844        if !ci.methods.is_empty() {
845            ci.methods.sort();
846            ci.fields.sort();
847            component_infos.push(ci);
848        }
849    }
850
851    // Sort components by first method name for deterministic output
852    component_infos.sort_by(|a, b| a.methods.first().cmp(&b.methods.first()));
853
854    let lcom4 = component_infos.len() as u32;
855    (lcom4.max(1), component_infos) // LCOM4 is at least 1 if there are methods
856}
857
858/// Generate a split suggestion for a class with LCOM4 > 1.
859fn generate_split_suggestion(class_name: &str, components: &[ComponentInfo]) -> String {
860    if components.is_empty() {
861        return format!("Consider splitting {} into multiple classes", class_name);
862    }
863
864    let parts: Vec<String> = components
865        .iter()
866        .map(|c| {
867            let methods_str = c.methods.join(", ");
868            format!("[{}]", methods_str)
869        })
870        .collect();
871
872    format!(
873        "Consider splitting {} into {} classes: {}",
874        class_name,
875        components.len(),
876        parts.join(" + ")
877    )
878}
879
880/// Compute summary statistics for a set of class cohesion results.
881fn compute_summary(classes: &[ClassCohesion]) -> CohesionSummary {
882    let total = classes.len() as u32;
883    if total == 0 {
884        return CohesionSummary::default();
885    }
886
887    let cohesive = classes
888        .iter()
889        .filter(|c| c.verdict == CohesionVerdict::Cohesive)
890        .count() as u32;
891
892    let split_candidates = total - cohesive;
893
894    let avg_lcom4 = classes.iter().map(|c| c.lcom4 as f64).sum::<f64>() / total as f64;
895
896    CohesionSummary {
897        total_classes: total,
898        cohesive,
899        split_candidates,
900        avg_lcom4: (avg_lcom4 * 100.0).round() / 100.0, // Round to 2 decimal places
901    }
902}
903
904/// Get text content of a node.
905fn get_node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
906    let start = node.start_byte();
907    let end = node.end_byte();
908    if end <= source.len() {
909        std::str::from_utf8(&source[start..end]).unwrap_or("")
910    } else {
911        ""
912    }
913}
914
915// =============================================================================
916// Text Formatting
917// =============================================================================
918
919/// Format a cohesion report as human-readable text.
920///
921/// Shows split candidate classes sorted worst-first (highest LCOM4), with
922/// color-coded severity, path stripping, component details, and split suggestions.
923/// Top 30 entries shown by default with overflow message.
924///
925/// ```text
926/// Cohesion Analysis (LCOM4)
927///
928/// LCOM4  Methods  Fields  Class                         File
929///     4        8       6  UserManager                   models/user.py:42
930///     |-- Component 1: create, update [db, cache]
931///     |-- Component 2: send_email [mailer]
932///     `-- Suggestion: Split into 4 focused classes
933///     3        6       4  OrderProcessor                services/order.py:15
934///     |-- Component 1: process, submit [queue]
935///     `-- Suggestion: Split into 3 focused classes
936///
937/// Summary: 47 classes, 12 split candidates (25.5%), avg LCOM4: 1.82
938/// ```
939pub fn format_cohesion_text(report: &CohesionReport) -> String {
940    let mut output = String::new();
941
942    let s = &report.summary;
943    output.push_str(&format!(
944        "Cohesion Analysis (LCOM4) ({} classes, {} split candidates)\n\n",
945        s.total_classes, s.split_candidates
946    ));
947
948    // Filter to split candidates only (LCOM4 > 1) and sort worst-first
949    let mut candidates: Vec<&ClassCohesion> = report
950        .classes
951        .iter()
952        .filter(|c| c.verdict == CohesionVerdict::SplitCandidate)
953        .collect();
954    candidates.sort_by(|a, b| b.lcom4.cmp(&a.lcom4));
955
956    if candidates.is_empty() {
957        output.push_str("  No split candidates found.\n\n");
958        output.push_str(&format_cohesion_summary(s));
959        return output;
960    }
961
962    // Compute common path prefix for relative display
963    let paths: Vec<&Path> = candidates
964        .iter()
965        .filter_map(|c| Path::new(c.file_path.as_str()).parent())
966        .collect();
967    let prefix = if paths.is_empty() {
968        std::path::PathBuf::new()
969    } else {
970        common_path_prefix(&paths)
971    };
972
973    // Header
974    output.push_str(&format!(
975        " {:>5}  {:>7}  {:>6}  {:<28}  {}\n",
976        "LCOM4", "Methods", "Fields", "Class", "File"
977    ));
978
979    // Show top 30
980    let limit = candidates.len().min(30);
981    for class in candidates.iter().take(limit) {
982        let rel = strip_prefix_display(Path::new(&class.file_path), &prefix);
983        let lcom4_str = format_lcom4_colored(class.lcom4);
984
985        // Truncate class name to 28 chars
986        let name = if class.class_name.len() > 28 {
987            format!("{}...", &class.class_name[..25])
988        } else {
989            class.class_name.clone()
990        };
991
992        output.push_str(&format!(
993            " {:>5}  {:>7}  {:>6}  {:<28}  {}:{}\n",
994            lcom4_str, class.method_count, class.field_count, name, rel, class.line
995        ));
996
997        // Show component details for split candidates
998        if !class.components.is_empty() {
999            let comp_count = class.components.len();
1000            for (i, comp) in class.components.iter().enumerate() {
1001                let is_last = i == comp_count - 1 && class.split_suggestion.is_none();
1002                let connector = if is_last { "`--" } else { "|--" };
1003                let methods_str = comp.methods.join(", ");
1004                let fields_str = if comp.fields.is_empty() {
1005                    String::new()
1006                } else {
1007                    format!(" [{}]", comp.fields.join(", "))
1008                };
1009                output.push_str(&format!(
1010                    "     {}  Component {}: {}{}\n",
1011                    connector,
1012                    i + 1,
1013                    methods_str,
1014                    fields_str
1015                ));
1016            }
1017        }
1018
1019        // Show split suggestion
1020        if let Some(ref suggestion) = class.split_suggestion {
1021            output.push_str(&format!("     `--  Suggestion: {}\n", suggestion));
1022        }
1023    }
1024
1025    if candidates.len() > limit {
1026        output.push_str(&format!(
1027            "\n  ... and {} more split candidates\n",
1028            candidates.len() - limit
1029        ));
1030    }
1031
1032    output.push('\n');
1033    output.push_str(&format_cohesion_summary(s));
1034
1035    output
1036}
1037
1038/// Format LCOM4 value with color coding based on severity.
1039fn format_lcom4_colored(lcom4: u32) -> String {
1040    if lcom4 >= 4 {
1041        format!("{}", lcom4).red().bold().to_string()
1042    } else if lcom4 >= 2 {
1043        format!("{}", lcom4).yellow().to_string()
1044    } else {
1045        format!("{}", lcom4).green().to_string()
1046    }
1047}
1048
1049/// Format the cohesion summary line.
1050fn format_cohesion_summary(s: &CohesionSummary) -> String {
1051    let pct = if s.total_classes > 0 {
1052        (s.split_candidates as f64 / s.total_classes as f64) * 100.0
1053    } else {
1054        0.0
1055    };
1056    format!(
1057        "Summary: {} classes, {} split candidates ({:.1}%), avg LCOM4: {:.2}\n",
1058        s.total_classes, s.split_candidates, pct, s.avg_lcom4
1059    )
1060}
1061
1062// =============================================================================
1063// Public Entry Point
1064// =============================================================================
1065
1066/// Run cohesion analysis (for programmatic use).
1067pub fn run(args: CohesionArgs) -> Result<CohesionReport> {
1068    let start = Instant::now();
1069    let timeout = Duration::from_secs(args.timeout);
1070
1071    // Validate path
1072    let canonical_path = if let Some(ref root) = args.project_root {
1073        validate_file_path_in_project(&args.path, root)?
1074    } else {
1075        validate_file_path(&args.path)?
1076    };
1077
1078    // Analyze based on path type
1079    let report = if canonical_path.is_dir() {
1080        analyze_directory(&canonical_path, &args, start, timeout)?
1081    } else {
1082        analyze_single_file(&canonical_path, &args)?
1083    };
1084
1085    Ok(report)
1086}
1087
1088// =============================================================================
1089// Tests
1090// =============================================================================
1091
1092#[cfg(test)]
1093mod tests {
1094    use super::*;
1095
1096    #[test]
1097    fn test_union_find_basic() {
1098        let mut uf = UnionFind::new(5);
1099
1100        // Initially all separate
1101        assert_eq!(uf.find(0), Some(0));
1102        assert_eq!(uf.find(1), Some(1));
1103
1104        // Union 0 and 1
1105        assert!(uf.union(0, 1));
1106        assert_eq!(uf.find(0), uf.find(1));
1107
1108        // Union 2 and 3
1109        assert!(uf.union(2, 3));
1110        assert_eq!(uf.find(2), uf.find(3));
1111
1112        // Different components
1113        assert_ne!(uf.find(0), uf.find(2));
1114
1115        // Union the two components
1116        assert!(uf.union(1, 3));
1117        assert_eq!(uf.find(0), uf.find(3));
1118    }
1119
1120    #[test]
1121    fn test_union_find_path_compression() {
1122        let mut uf = UnionFind::new(10);
1123
1124        // Create a chain: 0 -> 1 -> 2 -> 3 -> 4
1125        for i in 0..4 {
1126            uf.union(i, i + 1);
1127        }
1128
1129        // After find with path compression, all should point to root
1130        let root = uf.find(0).unwrap();
1131        for i in 0..5 {
1132            assert_eq!(uf.find(i), Some(root));
1133        }
1134    }
1135
1136    #[test]
1137    fn test_union_find_count_components() {
1138        let mut uf = UnionFind::new(6);
1139
1140        // Create two components: {0, 1, 2} and {3, 4, 5}
1141        uf.union(0, 1);
1142        uf.union(1, 2);
1143        uf.union(3, 4);
1144        uf.union(4, 5);
1145
1146        assert_eq!(uf.count_components(6), 2);
1147    }
1148
1149    #[test]
1150    fn test_is_dunder() {
1151        assert!(is_dunder("__init__"));
1152        assert!(is_dunder("__str__"));
1153        assert!(is_dunder("__eq__"));
1154        assert!(!is_dunder("_private"));
1155        assert!(!is_dunder("__private"));
1156        assert!(!is_dunder("public__"));
1157        assert!(!is_dunder("normal"));
1158    }
1159
1160    #[test]
1161    fn test_compute_summary() {
1162        let classes = vec![
1163            ClassCohesion {
1164                class_name: "A".to_string(),
1165                file_path: "test.py".to_string(),
1166                line: 1,
1167                lcom4: 1,
1168                method_count: 3,
1169                field_count: 2,
1170                verdict: CohesionVerdict::Cohesive,
1171                split_suggestion: None,
1172                components: vec![],
1173            },
1174            ClassCohesion {
1175                class_name: "B".to_string(),
1176                file_path: "test.py".to_string(),
1177                line: 10,
1178                lcom4: 2,
1179                method_count: 4,
1180                field_count: 3,
1181                verdict: CohesionVerdict::SplitCandidate,
1182                split_suggestion: Some("Split B".to_string()),
1183                components: vec![],
1184            },
1185        ];
1186
1187        let summary = compute_summary(&classes);
1188        assert_eq!(summary.total_classes, 2);
1189        assert_eq!(summary.cohesive, 1);
1190        assert_eq!(summary.split_candidates, 1);
1191        assert!((summary.avg_lcom4 - 1.5).abs() < 0.01);
1192    }
1193
1194    #[test]
1195    fn test_generate_split_suggestion() {
1196        let components = vec![
1197            ComponentInfo {
1198                methods: vec!["method_a".to_string(), "method_b".to_string()],
1199                fields: vec!["field_x".to_string()],
1200            },
1201            ComponentInfo {
1202                methods: vec!["method_c".to_string()],
1203                fields: vec!["field_y".to_string()],
1204            },
1205        ];
1206
1207        let suggestion = generate_split_suggestion("MyClass", &components);
1208        assert!(suggestion.contains("MyClass"));
1209        assert!(suggestion.contains("2 classes"));
1210        assert!(suggestion.contains("method_a"));
1211        assert!(suggestion.contains("method_c"));
1212    }
1213
1214    // =========================================================================
1215    // format_cohesion_text tests
1216    // =========================================================================
1217
1218    /// Helper to build a ClassCohesion for tests.
1219    fn make_class(
1220        name: &str,
1221        location: (&str, u32),
1222        lcom4: u32,
1223        methods: u32,
1224        fields: u32,
1225        components: Vec<ComponentInfo>,
1226        suggestion: Option<&str>,
1227    ) -> ClassCohesion {
1228        let (file, line) = location;
1229        ClassCohesion {
1230            class_name: name.to_string(),
1231            file_path: file.to_string(),
1232            line,
1233            lcom4,
1234            method_count: methods,
1235            field_count: fields,
1236            verdict: CohesionVerdict::from_lcom4(lcom4),
1237            split_suggestion: suggestion.map(|s| s.to_string()),
1238            components,
1239        }
1240    }
1241
1242    #[test]
1243    fn test_format_cohesion_text_sorts_worst_first() {
1244        let report = CohesionReport {
1245            classes: vec![
1246                make_class("Low", ("src/a.py", 1), 2, 3, 2, vec![], None),
1247                make_class("High", ("src/b.py", 5), 5, 8, 6, vec![], None),
1248                make_class("Mid", ("src/c.py", 10), 3, 5, 4, vec![], None),
1249            ],
1250            summary: CohesionSummary {
1251                total_classes: 3,
1252                cohesive: 0,
1253                split_candidates: 3,
1254                avg_lcom4: 3.33,
1255            },
1256        };
1257        let text = format_cohesion_text(&report);
1258        // "High" (LCOM4=5) should appear before "Mid" (3) before "Low" (2)
1259        let high_pos = text.find("High").expect("High not found");
1260        let mid_pos = text.find("Mid").expect("Mid not found");
1261        let low_pos = text.find("Low").expect("Low not found");
1262        assert!(
1263            high_pos < mid_pos,
1264            "High (LCOM4=5) should appear before Mid (LCOM4=3)"
1265        );
1266        assert!(
1267            mid_pos < low_pos,
1268            "Mid (LCOM4=3) should appear before Low (LCOM4=2)"
1269        );
1270    }
1271
1272    #[test]
1273    fn test_format_cohesion_text_filters_cohesive_classes() {
1274        let report = CohesionReport {
1275            classes: vec![
1276                make_class("Cohesive", ("src/a.py", 1), 1, 3, 2, vec![], None),
1277                make_class("NeedsSplit", ("src/b.py", 5), 3, 6, 4, vec![], None),
1278            ],
1279            summary: CohesionSummary {
1280                total_classes: 2,
1281                cohesive: 1,
1282                split_candidates: 1,
1283                avg_lcom4: 2.0,
1284            },
1285        };
1286        let text = format_cohesion_text(&report);
1287        // Cohesive class (LCOM4=1) should NOT appear in the table rows
1288        // but NeedsSplit (LCOM4=3) should appear
1289        assert!(
1290            !text.contains("Cohesive"),
1291            "Cohesive classes should be filtered out"
1292        );
1293        assert!(
1294            text.contains("NeedsSplit"),
1295            "Split candidates should appear"
1296        );
1297    }
1298
1299    #[test]
1300    fn test_format_cohesion_text_limits_to_30() {
1301        // Create 35 split candidates
1302        let classes: Vec<ClassCohesion> = (0..35)
1303            .map(|i| {
1304                make_class(
1305                    &format!("Class{}", i),
1306                    (&format!("src/mod{}.py", i), i + 1),
1307                    2,
1308                    4,
1309                    3,
1310                    vec![],
1311                    None,
1312                )
1313            })
1314            .collect();
1315        let report = CohesionReport {
1316            classes,
1317            summary: CohesionSummary {
1318                total_classes: 35,
1319                cohesive: 0,
1320                split_candidates: 35,
1321                avg_lcom4: 2.0,
1322            },
1323        };
1324        let text = format_cohesion_text(&report);
1325        assert!(
1326            text.contains("and 5 more"),
1327            "Should show overflow message for remaining 5 classes"
1328        );
1329    }
1330
1331    #[test]
1332    fn test_format_cohesion_text_strips_common_path_prefix() {
1333        let report = CohesionReport {
1334            classes: vec![
1335                make_class("A", ("src/models/user.py", 1), 3, 5, 4, vec![], None),
1336                make_class("B", ("src/models/order.py", 10), 2, 4, 3, vec![], None),
1337            ],
1338            summary: CohesionSummary {
1339                total_classes: 2,
1340                cohesive: 0,
1341                split_candidates: 2,
1342                avg_lcom4: 2.5,
1343            },
1344        };
1345        let text = format_cohesion_text(&report);
1346        // The common prefix "src/models/" should be stripped, showing just filenames
1347        assert!(
1348            text.contains("user.py"),
1349            "Should display stripped path: user.py"
1350        );
1351        assert!(
1352            text.contains("order.py"),
1353            "Should display stripped path: order.py"
1354        );
1355        // Full path should not appear
1356        assert!(
1357            !text.contains("src/models/user.py"),
1358            "Full path should be stripped"
1359        );
1360    }
1361
1362    #[test]
1363    fn test_format_cohesion_text_has_header() {
1364        let report = CohesionReport {
1365            classes: vec![make_class("A", ("src/a.py", 1), 2, 3, 2, vec![], None)],
1366            summary: CohesionSummary {
1367                total_classes: 1,
1368                cohesive: 0,
1369                split_candidates: 1,
1370                avg_lcom4: 2.0,
1371            },
1372        };
1373        let text = format_cohesion_text(&report);
1374        assert!(
1375            text.contains("Cohesion Analysis"),
1376            "Should have title header"
1377        );
1378        assert!(
1379            text.contains("LCOM4") && text.contains("Methods") && text.contains("Fields"),
1380            "Should have column headers"
1381        );
1382        assert!(
1383            text.contains("Class") && text.contains("File"),
1384            "Should have Class and File columns"
1385        );
1386    }
1387
1388    #[test]
1389    fn test_format_cohesion_text_summary_line() {
1390        let report = CohesionReport {
1391            classes: vec![],
1392            summary: CohesionSummary {
1393                total_classes: 47,
1394                cohesive: 35,
1395                split_candidates: 12,
1396                avg_lcom4: 1.82,
1397            },
1398        };
1399        let text = format_cohesion_text(&report);
1400        assert!(
1401            text.contains("47 classes"),
1402            "Summary should show total classes"
1403        );
1404        assert!(
1405            text.contains("12 split candidates"),
1406            "Summary should show split candidate count"
1407        );
1408        assert!(text.contains("1.82"), "Summary should show avg LCOM4");
1409    }
1410
1411    #[test]
1412    fn test_format_cohesion_text_shows_components() {
1413        let components = vec![
1414            ComponentInfo {
1415                methods: vec!["create".to_string(), "update".to_string()],
1416                fields: vec!["db".to_string(), "cache".to_string()],
1417            },
1418            ComponentInfo {
1419                methods: vec!["send_email".to_string()],
1420                fields: vec!["mailer".to_string()],
1421            },
1422        ];
1423        let report = CohesionReport {
1424            classes: vec![make_class(
1425                "UserManager",
1426                ("src/user.py", 1),
1427                2,
1428                3,
1429                3,
1430                components,
1431                Some("Split into 2 focused classes"),
1432            )],
1433            summary: CohesionSummary {
1434                total_classes: 1,
1435                cohesive: 0,
1436                split_candidates: 1,
1437                avg_lcom4: 2.0,
1438            },
1439        };
1440        let text = format_cohesion_text(&report);
1441        // Should show component info
1442        assert!(text.contains("Component 1"), "Should show Component 1");
1443        assert!(
1444            text.contains("create") && text.contains("update"),
1445            "Should show methods in component"
1446        );
1447        assert!(
1448            text.contains("db") && text.contains("cache"),
1449            "Should show fields in component"
1450        );
1451        assert!(text.contains("Component 2"), "Should show Component 2");
1452        assert!(
1453            text.contains("send_email"),
1454            "Should show methods in component 2"
1455        );
1456        // Should show suggestion
1457        assert!(
1458            text.contains("Split into 2 focused classes"),
1459            "Should show split suggestion"
1460        );
1461    }
1462
1463    #[test]
1464    fn test_format_cohesion_text_empty_report() {
1465        let report = CohesionReport {
1466            classes: vec![],
1467            summary: CohesionSummary {
1468                total_classes: 0,
1469                cohesive: 0,
1470                split_candidates: 0,
1471                avg_lcom4: 0.0,
1472            },
1473        };
1474        let text = format_cohesion_text(&report);
1475        assert!(
1476            text.contains("No split candidates"),
1477            "Empty report should show 'No split candidates' message"
1478        );
1479    }
1480
1481    #[test]
1482    fn test_format_cohesion_text_all_cohesive() {
1483        let report = CohesionReport {
1484            classes: vec![
1485                make_class("Good1", ("src/a.py", 1), 1, 5, 3, vec![], None),
1486                make_class("Good2", ("src/b.py", 10), 1, 4, 2, vec![], None),
1487            ],
1488            summary: CohesionSummary {
1489                total_classes: 2,
1490                cohesive: 2,
1491                split_candidates: 0,
1492                avg_lcom4: 1.0,
1493            },
1494        };
1495        let text = format_cohesion_text(&report);
1496        // All classes are cohesive, so no table rows should appear
1497        assert!(
1498            text.contains("No split candidates"),
1499            "All-cohesive report should show 'No split candidates'"
1500        );
1501    }
1502
1503    #[test]
1504    fn test_cohesion_args_lang_flag() {
1505        // Verify CohesionArgs has a lang field of type Option<Language>
1506        let args = CohesionArgs {
1507            path: PathBuf::from("src/"),
1508            min_methods: 2,
1509            include_dunder: false,
1510            output_format: OutputFormat::Json,
1511            timeout: 30,
1512            project_root: None,
1513            lang: Some(Language::Rust),
1514        };
1515        assert_eq!(args.lang, Some(Language::Rust));
1516
1517        // Also test None case (auto-detect)
1518        let args_auto = CohesionArgs {
1519            path: PathBuf::from("src/"),
1520            min_methods: 2,
1521            include_dunder: false,
1522            output_format: OutputFormat::Json,
1523            timeout: 30,
1524            project_root: None,
1525            lang: None,
1526        };
1527        assert_eq!(args_auto.lang, None);
1528    }
1529}