Skip to main content

tldr_cli/commands/patterns/
cohesion.rs

1//! Cohesion command - LCOM4 (Lack of Cohesion of Methods) analysis for Python classes.
2//!
3//! LCOM4 measures class cohesion by counting connected components in the method-field graph:
4//! - LCOM4 = 1: All methods are connected (cohesive class)
5//! - LCOM4 > 1: Methods form disconnected groups (split candidate)
6//!
7//! # Algorithm
8//!
9//! 1. Parse class, extract methods and field accesses (`self.x`)
10//! 2. Build bipartite graph: methods <-> fields they access
11//! 3. Add edges for intra-class method calls (`self.method()`)
12//! 4. Count connected components via union-find with path compression
13//!
14//! # TIGER Mitigations
15//!
16//! - **T06**: Union-find with path compression AND union by rank
17//! - **E01**: `--timeout` flag (default 30s)
18//! - **E04**: `MAX_METHODS_PER_CLASS` and `MAX_FIELDS_PER_CLASS` limits
19//! - **E05**: `MAX_ITERATIONS` for union-find operations
20//!
21//! # Example
22//!
23//! ```bash
24//! tldr cohesion src/models.py
25//! tldr cohesion src/models.py --min-methods 3 --include-dunder
26//! tldr cohesion src/ --format text
27//! ```
28
29use std::collections::{HashMap, HashSet};
30use std::path::{Path, PathBuf};
31use std::time::{Duration, Instant};
32
33use anyhow::Result;
34use clap::{Args, ValueEnum};
35use colored::Colorize;
36use serde::{Deserialize, Serialize};
37use tree_sitter::{Node, Parser, Tree};
38use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
39use walkdir::WalkDir;
40
41use tldr_core::quality::cohesion as core_cohesion;
42use tldr_core::types::Language;
43
44use crate::output::{common_path_prefix, strip_prefix_display, OutputFormat as GlobalOutputFormat};
45
46use super::error::{PatternsError, PatternsResult};
47use super::types::{
48    ClassCohesion, CohesionReport, CohesionSummary, CohesionVerdict, ComponentInfo,
49};
50use super::validation::{
51    read_file_safe, validate_directory_path, validate_file_path, validate_file_path_in_project,
52    MAX_CLASSES_PER_FILE, MAX_DIRECTORY_FILES, MAX_FIELDS_PER_CLASS, MAX_METHODS_PER_CLASS,
53};
54
55// =============================================================================
56// Constants (TIGER/ELEPHANT Mitigations)
57// =============================================================================
58
59/// Maximum union-find iterations to prevent infinite loops (E05)
60const MAX_UNION_FIND_ITERATIONS: usize = 10_000;
61
62/// Default timeout in seconds (E01)
63const DEFAULT_TIMEOUT_SECS: u64 = 30;
64
65// =============================================================================
66// Output Format
67// =============================================================================
68
69/// Output format for cohesion command
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Serialize, Deserialize)]
71#[serde(rename_all = "lowercase")]
72pub enum OutputFormat {
73    /// JSON output (default)
74    #[default]
75    Json,
76    /// Human-readable text output
77    Text,
78}
79
80// =============================================================================
81// CLI Arguments
82// =============================================================================
83
84/// Compute LCOM4 (Lack of Cohesion of Methods) metric for Python classes.
85///
86/// LCOM4 measures class cohesion by counting connected components in the
87/// method-field bipartite graph. A cohesive class has LCOM4 = 1, while
88/// a class with LCOM4 > 1 is a candidate for splitting.
89///
90/// # Example
91///
92/// ```bash
93/// tldr cohesion src/models.py
94/// tldr cohesion src/models.py --min-methods 3
95/// tldr cohesion src/ --format text
96/// ```
97#[derive(Debug, Args)]
98pub struct CohesionArgs {
99    /// File or directory to analyze
100    pub path: PathBuf,
101
102    /// Minimum number of instance methods for a class to be included in analysis.
103    /// Classes with fewer methods are filtered from results. For Rust and Go,
104    /// only instance methods (with self/receiver) are counted, not associated
105    /// functions like new() or default().
106    #[arg(long, default_value = "1")]
107    pub min_methods: u32,
108
109    /// Include dunder methods (__init__, __str__, etc.) in analysis
110    #[arg(long)]
111    pub include_dunder: bool,
112
113    /// Output format (json or text). Prefer global --format/-f flag.
114    #[arg(
115        long = "output-format",
116        alias = "output",
117        short = 'o',
118        hide = true,
119        value_enum,
120        default_value = "json"
121    )]
122    pub output_format: OutputFormat,
123
124    /// Analysis timeout in seconds
125    #[arg(long, default_value_t = DEFAULT_TIMEOUT_SECS)]
126    pub timeout: u64,
127
128    /// Project root for path validation (optional)
129    #[arg(long)]
130    pub project_root: Option<PathBuf>,
131
132    /// Language filter (auto-detected if omitted)
133    #[arg(long, short = 'l')]
134    pub lang: Option<Language>,
135}
136
137impl CohesionArgs {
138    /// Run the cohesion analysis command
139    pub fn run(&self, global_format: GlobalOutputFormat) -> Result<()> {
140        let start = Instant::now();
141        let timeout = Duration::from_secs(self.timeout);
142
143        // Validate path
144        let canonical_path = if let Some(ref root) = self.project_root {
145            validate_file_path_in_project(&self.path, root)?
146        } else {
147            validate_file_path(&self.path)?
148        };
149
150        // Analyze based on path type
151        let report = if canonical_path.is_dir() {
152            analyze_directory(&canonical_path, self, start, timeout)?
153        } else {
154            analyze_single_file(&canonical_path, self)?
155        };
156
157        // Resolve format: global -f flag takes priority over hidden --output-format
158        let use_text = matches!(global_format, GlobalOutputFormat::Text)
159            || matches!(self.output_format, OutputFormat::Text);
160
161        // Output based on format
162        if use_text {
163            let text = format_cohesion_text(&report);
164            println!("{}", text);
165        } else {
166            let json = serde_json::to_string_pretty(&report)?;
167            println!("{}", json);
168        }
169
170        Ok(())
171    }
172}
173
174// =============================================================================
175// Union-Find with Path Compression (TIGER-06)
176// =============================================================================
177
178/// Union-Find (Disjoint Set Union) data structure with path compression and union by rank.
179///
180/// This implementation uses both optimizations to achieve near-O(1) amortized time per operation:
181/// - **Path compression**: Flatten tree during `find` operations
182/// - **Union by rank**: Attach smaller tree under root of larger tree
183///
184/// # TIGER-06 Mitigation
185///
186/// Path compression prevents worst-case O(n) find operations.
187/// Union by rank keeps trees balanced.
188#[derive(Debug, Clone)]
189pub struct UnionFind {
190    /// Parent pointers (index -> parent index)
191    parent: Vec<usize>,
192    /// Rank for union by rank optimization
193    rank: Vec<usize>,
194    /// Iteration counter to prevent infinite loops (E05)
195    iterations: usize,
196    /// Maximum allowed iterations
197    max_iterations: usize,
198}
199
200impl UnionFind {
201    /// Create a new union-find structure with n elements
202    pub fn new(n: usize) -> Self {
203        Self {
204            parent: (0..n).collect(),
205            rank: vec![0; n],
206            iterations: 0,
207            max_iterations: MAX_UNION_FIND_ITERATIONS,
208        }
209    }
210
211    /// Find the root of the set containing x, with path compression.
212    ///
213    /// Returns None if max iterations exceeded.
214    pub fn find(&mut self, x: usize) -> Option<usize> {
215        if x >= self.parent.len() {
216            return None;
217        }
218
219        // Find root
220        let mut root = x;
221        while self.parent[root] != root {
222            self.iterations += 1;
223            if self.iterations > self.max_iterations {
224                return None; // Exceeded iteration limit
225            }
226            root = self.parent[root];
227        }
228
229        // Path compression: point all nodes on path directly to root
230        let mut current = x;
231        while self.parent[current] != root {
232            self.iterations += 1;
233            if self.iterations > self.max_iterations {
234                return None;
235            }
236            let next = self.parent[current];
237            self.parent[current] = root;
238            current = next;
239        }
240
241        Some(root)
242    }
243
244    /// Union the sets containing x and y, using union by rank.
245    ///
246    /// Returns true if a union was performed, false if already in same set or error.
247    pub fn union(&mut self, x: usize, y: usize) -> bool {
248        let root_x = match self.find(x) {
249            Some(r) => r,
250            None => return false,
251        };
252        let root_y = match self.find(y) {
253            Some(r) => r,
254            None => return false,
255        };
256
257        if root_x == root_y {
258            return false; // Already in same set
259        }
260
261        // Union by rank: attach smaller tree under root of larger tree
262        match self.rank[root_x].cmp(&self.rank[root_y]) {
263            std::cmp::Ordering::Less => {
264                self.parent[root_x] = root_y;
265            }
266            std::cmp::Ordering::Greater => {
267                self.parent[root_y] = root_x;
268            }
269            std::cmp::Ordering::Equal => {
270                self.parent[root_y] = root_x;
271                self.rank[root_x] += 1;
272            }
273        }
274
275        true
276    }
277
278    /// Count the number of unique connected components.
279    ///
280    /// Only counts components for the first `method_count` elements (ignoring fields
281    /// that are only connected to a single method).
282    pub fn count_components(&mut self, method_count: usize) -> usize {
283        let mut roots = HashSet::new();
284        for i in 0..method_count.min(self.parent.len()) {
285            if let Some(root) = self.find(i) {
286                roots.insert(root);
287            }
288        }
289        roots.len()
290    }
291
292    /// Get components as groups of indices.
293    pub fn get_components(&mut self) -> HashMap<usize, Vec<usize>> {
294        let mut components: HashMap<usize, Vec<usize>> = HashMap::new();
295        for i in 0..self.parent.len() {
296            if let Some(root) = self.find(i) {
297                components.entry(root).or_default().push(i);
298            }
299        }
300        components
301    }
302
303    /// Check if iteration limit was exceeded
304    pub fn limit_exceeded(&self) -> bool {
305        self.iterations > self.max_iterations
306    }
307}
308
309// =============================================================================
310// Method Analysis
311// =============================================================================
312
313/// Analysis result for a single method
314#[derive(Debug, Clone)]
315struct MethodAnalysis {
316    /// Method name
317    name: String,
318    /// Fields accessed by this method (self.x)
319    field_accesses: Vec<String>,
320    /// Other methods called (self.method())
321    method_calls: Vec<String>,
322}
323
324// =============================================================================
325// Core Analysis Functions
326// =============================================================================
327
328/// Analyze a single file for class cohesion.
329///
330/// For Python files, uses the CLI's own Python-specific implementation.
331/// For all other supported languages (Java, TypeScript, Go, Rust, etc.),
332/// delegates to the core library's multi-language analyzer.
333fn analyze_single_file(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
334    let lang = Language::from_path(path);
335
336    // For non-Python languages, delegate to the core multi-language analyzer
337    if lang != Some(Language::Python) && lang.is_some() {
338        return analyze_single_file_core(path, args);
339    }
340
341    // Python: use the CLI's existing Python-specific implementation
342    let source = read_file_safe(path)?;
343    let tree = parse_python(&source, path)?;
344    let classes = analyze_file_ast(&tree, &source, path, args)?;
345
346    let summary = compute_summary(&classes);
347
348    Ok(CohesionReport { classes, summary })
349}
350
351/// Analyze a single non-Python file using the core library.
352fn analyze_single_file_core(path: &Path, args: &CohesionArgs) -> PatternsResult<CohesionReport> {
353    let threshold = 2;
354    let core_report = core_cohesion::analyze_cohesion(path, None, threshold).map_err(|e| {
355        PatternsError::ParseError {
356            file: path.to_path_buf(),
357            message: format!("Core cohesion analysis failed: {}", e),
358        }
359    })?;
360
361    // Convert core types to CLI types
362    let classes: Vec<ClassCohesion> = core_report
363        .classes
364        .into_iter()
365        .filter(|c| c.method_count >= args.min_methods as usize)
366        .map(|c| ClassCohesion {
367            class_name: c.name,
368            file_path: c.file.display().to_string(),
369            line: c.line as u32,
370            lcom4: c.lcom4 as u32,
371            method_count: c.method_count as u32,
372            field_count: c.field_count as u32,
373            verdict: match c.verdict {
374                core_cohesion::CohesionVerdict::Cohesive => CohesionVerdict::Cohesive,
375                core_cohesion::CohesionVerdict::SplitCandidate => CohesionVerdict::SplitCandidate,
376            },
377            split_suggestion: c.split_suggestion,
378            components: c
379                .components
380                .into_iter()
381                .map(|comp| ComponentInfo {
382                    methods: comp.methods,
383                    fields: comp.fields,
384                })
385                .collect(),
386        })
387        .collect();
388
389    let summary = compute_summary(&classes);
390    Ok(CohesionReport { classes, summary })
391}
392
393/// Analyze a directory of source files for class cohesion.
394///
395/// Supports Python, Java, TypeScript, JavaScript, Go, Rust, and other
396/// languages supported by the core library.
397fn analyze_directory(
398    dir: &Path,
399    args: &CohesionArgs,
400    start: Instant,
401    timeout: Duration,
402) -> PatternsResult<CohesionReport> {
403    validate_directory_path(dir)?;
404
405    let mut all_classes = Vec::new();
406    let mut file_count = 0u32;
407
408    for entry in WalkDir::new(dir)
409        .follow_links(false)
410        .into_iter()
411        .filter_map(|e| e.ok())
412    {
413        // Check timeout
414        if start.elapsed() > timeout {
415            return Err(PatternsError::Timeout {
416                timeout_secs: args.timeout,
417            });
418        }
419
420        // Check file limit
421        if file_count >= MAX_DIRECTORY_FILES {
422            return Err(PatternsError::TooManyFiles {
423                count: file_count,
424                max_files: MAX_DIRECTORY_FILES,
425            });
426        }
427
428        let path = entry.path();
429
430        // Analyze files with recognized language extensions
431        if path.is_file() && Language::from_path(path).is_some() {
432            file_count += 1;
433
434            // Skip test files unless explicitly included
435            let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
436            if filename.starts_with("test_") || filename.ends_with("_test.py") {
437                continue;
438            }
439
440            // Analyze file, collecting errors but continuing
441            match analyze_single_file(path, args) {
442                Ok(report) => {
443                    all_classes.extend(report.classes);
444                }
445                Err(_) => {
446                    // Skip files with parse errors
447                    continue;
448                }
449            }
450        }
451    }
452
453    let summary = compute_summary(&all_classes);
454
455    Ok(CohesionReport {
456        classes: all_classes,
457        summary,
458    })
459}
460
461/// Parse Python source code with tree-sitter.
462fn parse_python(source: &str, file: &Path) -> PatternsResult<Tree> {
463    let mut parser = Parser::new();
464    parser
465        .set_language(&PYTHON_LANGUAGE.into())
466        .map_err(|e| PatternsError::ParseError {
467            file: file.to_path_buf(),
468            message: format!("Failed to set Python language: {}", e),
469        })?;
470
471    parser
472        .parse(source, None)
473        .ok_or_else(|| PatternsError::ParseError {
474            file: file.to_path_buf(),
475            message: "Parsing returned None".to_string(),
476        })
477}
478
479/// Analyze all classes in a parsed Python file.
480fn analyze_file_ast(
481    tree: &Tree,
482    source: &str,
483    file: &Path,
484    args: &CohesionArgs,
485) -> PatternsResult<Vec<ClassCohesion>> {
486    let root = tree.root_node();
487    let source_bytes = source.as_bytes();
488    let mut results = Vec::new();
489    let mut class_count = 0;
490
491    let mut cursor = root.walk();
492    for child in root.children(&mut cursor) {
493        if child.kind() == "class_definition" {
494            class_count += 1;
495            if class_count > MAX_CLASSES_PER_FILE {
496                break; // Limit exceeded
497            }
498
499            if let Some(cohesion) = analyze_class(child, source_bytes, file, args)? {
500                results.push(cohesion);
501            }
502        }
503    }
504
505    Ok(results)
506}
507
508/// Analyze a single class for LCOM4 cohesion.
509fn analyze_class(
510    class_node: Node,
511    source: &[u8],
512    file: &Path,
513    args: &CohesionArgs,
514) -> PatternsResult<Option<ClassCohesion>> {
515    // Get class name
516    let class_name = class_node
517        .child_by_field_name("name")
518        .map(|n| get_node_text(n, source))
519        .unwrap_or("<unknown>")
520        .to_string();
521
522    let line = class_node.start_position().row as u32 + 1;
523
524    // Get class body
525    let body = match class_node.child_by_field_name("body") {
526        Some(b) => b,
527        None => return Ok(None),
528    };
529
530    // Extract methods
531    let methods = extract_methods(body, source, args.include_dunder)?;
532
533    // Filter by min_methods threshold (use all methods for threshold)
534    let all_methods = extract_methods(body, source, true)?;
535    if all_methods.len() < args.min_methods as usize {
536        return Ok(None);
537    }
538
539    // Check method limit (E04)
540    if methods.len() > MAX_METHODS_PER_CLASS {
541        return Ok(Some(ClassCohesion {
542            class_name,
543            file_path: file.display().to_string(),
544            line,
545            lcom4: 0,
546            method_count: methods.len() as u32,
547            field_count: 0,
548            verdict: CohesionVerdict::Cohesive,
549            split_suggestion: Some("Class exceeds MAX_METHODS_PER_CLASS limit".to_string()),
550            components: vec![],
551        }));
552    }
553
554    // Collect all unique fields
555    let mut all_fields: HashSet<String> = HashSet::new();
556    let method_names: HashSet<&str> = methods.iter().map(|m| m.name.as_str()).collect();
557
558    for method in &methods {
559        for field in &method.field_accesses {
560            // Don't count method names as fields
561            if !method_names.contains(field.as_str()) {
562                all_fields.insert(field.clone());
563            }
564        }
565    }
566
567    // Check field limit (E04)
568    if all_fields.len() > MAX_FIELDS_PER_CLASS {
569        return Ok(Some(ClassCohesion {
570            class_name,
571            file_path: file.display().to_string(),
572            line,
573            lcom4: 0,
574            method_count: methods.len() as u32,
575            field_count: all_fields.len() as u32,
576            verdict: CohesionVerdict::Cohesive,
577            split_suggestion: Some("Class exceeds MAX_FIELDS_PER_CLASS limit".to_string()),
578            components: vec![],
579        }));
580    }
581
582    let fields: Vec<String> = all_fields.into_iter().collect();
583
584    // Compute LCOM4
585    let (lcom4, components) = compute_lcom4(&methods, &fields, &method_names);
586
587    // Determine verdict
588    let verdict = CohesionVerdict::from_lcom4(lcom4);
589
590    // Generate split suggestion if needed
591    let split_suggestion = if lcom4 > 1 {
592        Some(generate_split_suggestion(&class_name, &components))
593    } else {
594        None
595    };
596
597    Ok(Some(ClassCohesion {
598        class_name,
599        file_path: file.display().to_string(),
600        line,
601        lcom4,
602        method_count: methods.len() as u32,
603        field_count: fields.len() as u32,
604        verdict,
605        split_suggestion,
606        components,
607    }))
608}
609
610/// Extract methods from a class body.
611fn extract_methods(
612    body: Node,
613    source: &[u8],
614    include_dunder: bool,
615) -> PatternsResult<Vec<MethodAnalysis>> {
616    let mut methods = Vec::new();
617    let mut cursor = body.walk();
618
619    for child in body.children(&mut cursor) {
620        // Handle both sync and async function definitions
621        if child.kind() == "function_definition" || child.kind() == "async_function_definition" {
622            // Get method name
623            let name = child
624                .child_by_field_name("name")
625                .map(|n| get_node_text(n, source))
626                .unwrap_or("")
627                .to_string();
628
629            // Skip static methods and class methods
630            if is_static_or_classmethod(&child, source) {
631                continue;
632            }
633
634            // Filter dunder methods
635            if !include_dunder && is_dunder(&name) {
636                continue;
637            }
638
639            // Extract field accesses (self.x)
640            let field_accesses = extract_field_accesses(child, source);
641
642            // Extract method calls (self.method())
643            let method_calls = extract_method_calls(child, source);
644
645            methods.push(MethodAnalysis {
646                name,
647                field_accesses,
648                method_calls,
649            });
650        }
651    }
652
653    Ok(methods)
654}
655
656/// Check if a method is a dunder method (__xxx__)
657fn is_dunder(name: &str) -> bool {
658    name.starts_with("__") && name.ends_with("__")
659}
660
661/// Check if a method is decorated with @staticmethod or @classmethod
662fn is_static_or_classmethod(node: &Node, source: &[u8]) -> bool {
663    let mut cursor = node.walk();
664    for child in node.children(&mut cursor) {
665        if child.kind() == "decorator" {
666            let text = get_node_text(child, source);
667            if text.contains("staticmethod") || text.contains("classmethod") {
668                return true;
669            }
670        }
671    }
672    false
673}
674
675/// Extract field accesses (self.x) from a method.
676fn extract_field_accesses(method: Node, source: &[u8]) -> Vec<String> {
677    let mut fields = Vec::new();
678    let self_name = get_self_param_name(method, source);
679
680    extract_field_accesses_recursive(method, source, &self_name, &mut fields);
681
682    fields.sort();
683    fields.dedup();
684    fields
685}
686
687fn extract_field_accesses_recursive(
688    node: Node,
689    source: &[u8],
690    self_name: &str,
691    fields: &mut Vec<String>,
692) {
693    // Check if this is a self.x attribute access
694    if node.kind() == "attribute" {
695        if let Some(obj) = node.child_by_field_name("object") {
696            if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
697                if let Some(attr) = node.child_by_field_name("attribute") {
698                    let attr_name = get_node_text(attr, source);
699                    fields.push(attr_name.to_string());
700                }
701            }
702        }
703    }
704
705    // Recurse into children
706    let mut cursor = node.walk();
707    for child in node.children(&mut cursor) {
708        extract_field_accesses_recursive(child, source, self_name, fields);
709    }
710}
711
712/// Extract method calls (self.method()) from a method.
713fn extract_method_calls(method: Node, source: &[u8]) -> Vec<String> {
714    let mut calls = Vec::new();
715    let self_name = get_self_param_name(method, source);
716
717    extract_method_calls_recursive(method, source, &self_name, &mut calls);
718
719    calls.sort();
720    calls.dedup();
721    calls
722}
723
724fn extract_method_calls_recursive(
725    node: Node,
726    source: &[u8],
727    self_name: &str,
728    calls: &mut Vec<String>,
729) {
730    // Check if this is a self.method() call
731    if node.kind() == "call" {
732        if let Some(func) = node.child_by_field_name("function") {
733            if func.kind() == "attribute" {
734                if let Some(obj) = func.child_by_field_name("object") {
735                    if obj.kind() == "identifier" && get_node_text(obj, source) == self_name {
736                        if let Some(attr) = func.child_by_field_name("attribute") {
737                            let method_name = get_node_text(attr, source);
738                            calls.push(method_name.to_string());
739                        }
740                    }
741                }
742            }
743        }
744    }
745
746    // Recurse into children
747    let mut cursor = node.walk();
748    for child in node.children(&mut cursor) {
749        extract_method_calls_recursive(child, source, self_name, calls);
750    }
751}
752
753/// Get the name of the self parameter (usually "self" but could be different)
754fn get_self_param_name(method: Node, source: &[u8]) -> String {
755    if let Some(params) = method.child_by_field_name("parameters") {
756        let mut cursor = params.walk();
757        for child in params.children(&mut cursor) {
758            if child.kind() == "identifier" {
759                return get_node_text(child, source).to_string();
760            }
761        }
762    }
763    "self".to_string()
764}
765
766/// Compute LCOM4 using union-find.
767///
768/// # Returns
769/// (lcom4_value, connected_components)
770fn compute_lcom4(
771    methods: &[MethodAnalysis],
772    fields: &[String],
773    method_names: &HashSet<&str>,
774) -> (u32, Vec<ComponentInfo>) {
775    if methods.is_empty() {
776        return (0, vec![]);
777    }
778
779    // Create index mappings
780    let method_idx: HashMap<&str, usize> = methods
781        .iter()
782        .enumerate()
783        .map(|(i, m)| (m.name.as_str(), i))
784        .collect();
785
786    let field_idx: HashMap<&str, usize> = fields
787        .iter()
788        .enumerate()
789        .map(|(i, f)| (f.as_str(), methods.len() + i))
790        .collect();
791
792    // Initialize union-find
793    let mut uf = UnionFind::new(methods.len() + fields.len());
794
795    // Connect methods to fields they access
796    for (i, method) in methods.iter().enumerate() {
797        for field in &method.field_accesses {
798            if let Some(&fi) = field_idx.get(field.as_str()) {
799                uf.union(i, fi);
800            }
801        }
802    }
803
804    // Connect methods that call each other
805    for (i, method) in methods.iter().enumerate() {
806        for called in &method.method_calls {
807            if method_names.contains(called.as_str()) {
808                if let Some(&ci) = method_idx.get(called.as_str()) {
809                    uf.union(i, ci);
810                }
811            }
812        }
813    }
814
815    // Check if limit was exceeded
816    if uf.limit_exceeded() {
817        return (
818            0,
819            vec![ComponentInfo {
820                methods: vec!["<analysis incomplete>".to_string()],
821                fields: vec![],
822            }],
823        );
824    }
825
826    // Build component infos
827    let raw_components = uf.get_components();
828    let mut component_infos: Vec<ComponentInfo> = Vec::new();
829
830    for (_, members) in raw_components {
831        let mut ci = ComponentInfo {
832            methods: Vec::new(),
833            fields: Vec::new(),
834        };
835
836        for member_idx in members {
837            if member_idx < methods.len() {
838                ci.methods.push(methods[member_idx].name.clone());
839            } else {
840                let field_pos = member_idx - methods.len();
841                if field_pos < fields.len() {
842                    ci.fields.push(fields[field_pos].clone());
843                }
844            }
845        }
846
847        // Only include components that have at least one method
848        if !ci.methods.is_empty() {
849            ci.methods.sort();
850            ci.fields.sort();
851            component_infos.push(ci);
852        }
853    }
854
855    // Sort components by first method name for deterministic output
856    component_infos.sort_by(|a, b| a.methods.first().cmp(&b.methods.first()));
857
858    let lcom4 = component_infos.len() as u32;
859    (lcom4.max(1), component_infos) // LCOM4 is at least 1 if there are methods
860}
861
862/// Generate a split suggestion for a class with LCOM4 > 1.
863fn generate_split_suggestion(class_name: &str, components: &[ComponentInfo]) -> String {
864    if components.is_empty() {
865        return format!("Consider splitting {} into multiple classes", class_name);
866    }
867
868    let parts: Vec<String> = components
869        .iter()
870        .map(|c| {
871            let methods_str = c.methods.join(", ");
872            format!("[{}]", methods_str)
873        })
874        .collect();
875
876    format!(
877        "Consider splitting {} into {} classes: {}",
878        class_name,
879        components.len(),
880        parts.join(" + ")
881    )
882}
883
884/// Compute summary statistics for a set of class cohesion results.
885fn compute_summary(classes: &[ClassCohesion]) -> CohesionSummary {
886    let total = classes.len() as u32;
887    if total == 0 {
888        return CohesionSummary::default();
889    }
890
891    let cohesive = classes
892        .iter()
893        .filter(|c| c.verdict == CohesionVerdict::Cohesive)
894        .count() as u32;
895
896    let split_candidates = total - cohesive;
897
898    let avg_lcom4 = classes.iter().map(|c| c.lcom4 as f64).sum::<f64>() / total as f64;
899
900    CohesionSummary {
901        total_classes: total,
902        cohesive,
903        split_candidates,
904        avg_lcom4: (avg_lcom4 * 100.0).round() / 100.0, // Round to 2 decimal places
905    }
906}
907
908/// Get text content of a node.
909fn get_node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
910    let start = node.start_byte();
911    let end = node.end_byte();
912    if end <= source.len() {
913        std::str::from_utf8(&source[start..end]).unwrap_or("")
914    } else {
915        ""
916    }
917}
918
919// =============================================================================
920// Text Formatting
921// =============================================================================
922
923/// Format a cohesion report as human-readable text.
924///
925/// Shows split candidate classes sorted worst-first (highest LCOM4), with
926/// color-coded severity, path stripping, component details, and split suggestions.
927/// Top 30 entries shown by default with overflow message.
928///
929/// ```text
930/// Cohesion Analysis (LCOM4)
931///
932/// LCOM4  Methods  Fields  Class                         File
933///     4        8       6  UserManager                   models/user.py:42
934///     |-- Component 1: create, update [db, cache]
935///     |-- Component 2: send_email [mailer]
936///     `-- Suggestion: Split into 4 focused classes
937///     3        6       4  OrderProcessor                services/order.py:15
938///     |-- Component 1: process, submit [queue]
939///     `-- Suggestion: Split into 3 focused classes
940///
941/// Summary: 47 classes, 12 split candidates (25.5%), avg LCOM4: 1.82
942/// ```
943pub fn format_cohesion_text(report: &CohesionReport) -> String {
944    let mut output = String::new();
945
946    let s = &report.summary;
947    output.push_str(&format!(
948        "Cohesion Analysis (LCOM4) ({} classes, {} split candidates)\n\n",
949        s.total_classes, s.split_candidates
950    ));
951
952    // Filter to split candidates only (LCOM4 > 1) and sort worst-first
953    let mut candidates: Vec<&ClassCohesion> = report
954        .classes
955        .iter()
956        .filter(|c| c.verdict == CohesionVerdict::SplitCandidate)
957        .collect();
958    candidates.sort_by(|a, b| b.lcom4.cmp(&a.lcom4));
959
960    if candidates.is_empty() {
961        output.push_str("  No split candidates found.\n\n");
962        output.push_str(&format_cohesion_summary(s));
963        return output;
964    }
965
966    // Compute common path prefix for relative display
967    let paths: Vec<&Path> = candidates
968        .iter()
969        .filter_map(|c| Path::new(c.file_path.as_str()).parent())
970        .collect();
971    let prefix = if paths.is_empty() {
972        std::path::PathBuf::new()
973    } else {
974        common_path_prefix(&paths)
975    };
976
977    // Header
978    output.push_str(&format!(
979        " {:>5}  {:>7}  {:>6}  {:<28}  {}\n",
980        "LCOM4", "Methods", "Fields", "Class", "File"
981    ));
982
983    // Show top 30
984    let limit = candidates.len().min(30);
985    for class in candidates.iter().take(limit) {
986        let rel = strip_prefix_display(Path::new(&class.file_path), &prefix);
987        let lcom4_str = format_lcom4_colored(class.lcom4);
988
989        // Truncate class name to 28 chars
990        let name = if class.class_name.len() > 28 {
991            format!("{}...", &class.class_name[..25])
992        } else {
993            class.class_name.clone()
994        };
995
996        output.push_str(&format!(
997            " {:>5}  {:>7}  {:>6}  {:<28}  {}:{}\n",
998            lcom4_str, class.method_count, class.field_count, name, rel, class.line
999        ));
1000
1001        // Show component details for split candidates
1002        if !class.components.is_empty() {
1003            let comp_count = class.components.len();
1004            for (i, comp) in class.components.iter().enumerate() {
1005                let is_last = i == comp_count - 1 && class.split_suggestion.is_none();
1006                let connector = if is_last { "`--" } else { "|--" };
1007                let methods_str = comp.methods.join(", ");
1008                let fields_str = if comp.fields.is_empty() {
1009                    String::new()
1010                } else {
1011                    format!(" [{}]", comp.fields.join(", "))
1012                };
1013                output.push_str(&format!(
1014                    "     {}  Component {}: {}{}\n",
1015                    connector,
1016                    i + 1,
1017                    methods_str,
1018                    fields_str
1019                ));
1020            }
1021        }
1022
1023        // Show split suggestion
1024        if let Some(ref suggestion) = class.split_suggestion {
1025            output.push_str(&format!("     `--  Suggestion: {}\n", suggestion));
1026        }
1027    }
1028
1029    if candidates.len() > limit {
1030        output.push_str(&format!(
1031            "\n  ... and {} more split candidates\n",
1032            candidates.len() - limit
1033        ));
1034    }
1035
1036    output.push('\n');
1037    output.push_str(&format_cohesion_summary(s));
1038
1039    output
1040}
1041
1042/// Format LCOM4 value with color coding based on severity.
1043fn format_lcom4_colored(lcom4: u32) -> String {
1044    if lcom4 >= 4 {
1045        format!("{}", lcom4).red().bold().to_string()
1046    } else if lcom4 >= 2 {
1047        format!("{}", lcom4).yellow().to_string()
1048    } else {
1049        format!("{}", lcom4).green().to_string()
1050    }
1051}
1052
1053/// Format the cohesion summary line.
1054fn format_cohesion_summary(s: &CohesionSummary) -> String {
1055    let pct = if s.total_classes > 0 {
1056        (s.split_candidates as f64 / s.total_classes as f64) * 100.0
1057    } else {
1058        0.0
1059    };
1060    format!(
1061        "Summary: {} classes, {} split candidates ({:.1}%), avg LCOM4: {:.2}\n",
1062        s.total_classes, s.split_candidates, pct, s.avg_lcom4
1063    )
1064}
1065
1066// =============================================================================
1067// Public Entry Point
1068// =============================================================================
1069
1070/// Run cohesion analysis (for programmatic use).
1071pub fn run(args: CohesionArgs) -> Result<CohesionReport> {
1072    let start = Instant::now();
1073    let timeout = Duration::from_secs(args.timeout);
1074
1075    // Validate path
1076    let canonical_path = if let Some(ref root) = args.project_root {
1077        validate_file_path_in_project(&args.path, root)?
1078    } else {
1079        validate_file_path(&args.path)?
1080    };
1081
1082    // Analyze based on path type
1083    let report = if canonical_path.is_dir() {
1084        analyze_directory(&canonical_path, &args, start, timeout)?
1085    } else {
1086        analyze_single_file(&canonical_path, &args)?
1087    };
1088
1089    Ok(report)
1090}
1091
1092// =============================================================================
1093// Tests
1094// =============================================================================
1095
1096#[cfg(test)]
1097mod tests {
1098    use super::*;
1099
1100    #[test]
1101    fn test_union_find_basic() {
1102        let mut uf = UnionFind::new(5);
1103
1104        // Initially all separate
1105        assert_eq!(uf.find(0), Some(0));
1106        assert_eq!(uf.find(1), Some(1));
1107
1108        // Union 0 and 1
1109        assert!(uf.union(0, 1));
1110        assert_eq!(uf.find(0), uf.find(1));
1111
1112        // Union 2 and 3
1113        assert!(uf.union(2, 3));
1114        assert_eq!(uf.find(2), uf.find(3));
1115
1116        // Different components
1117        assert_ne!(uf.find(0), uf.find(2));
1118
1119        // Union the two components
1120        assert!(uf.union(1, 3));
1121        assert_eq!(uf.find(0), uf.find(3));
1122    }
1123
1124    #[test]
1125    fn test_union_find_path_compression() {
1126        let mut uf = UnionFind::new(10);
1127
1128        // Create a chain: 0 -> 1 -> 2 -> 3 -> 4
1129        for i in 0..4 {
1130            uf.union(i, i + 1);
1131        }
1132
1133        // After find with path compression, all should point to root
1134        let root = uf.find(0).unwrap();
1135        for i in 0..5 {
1136            assert_eq!(uf.find(i), Some(root));
1137        }
1138    }
1139
1140    #[test]
1141    fn test_union_find_count_components() {
1142        let mut uf = UnionFind::new(6);
1143
1144        // Create two components: {0, 1, 2} and {3, 4, 5}
1145        uf.union(0, 1);
1146        uf.union(1, 2);
1147        uf.union(3, 4);
1148        uf.union(4, 5);
1149
1150        assert_eq!(uf.count_components(6), 2);
1151    }
1152
1153    #[test]
1154    fn test_is_dunder() {
1155        assert!(is_dunder("__init__"));
1156        assert!(is_dunder("__str__"));
1157        assert!(is_dunder("__eq__"));
1158        assert!(!is_dunder("_private"));
1159        assert!(!is_dunder("__private"));
1160        assert!(!is_dunder("public__"));
1161        assert!(!is_dunder("normal"));
1162    }
1163
1164    #[test]
1165    fn test_compute_summary() {
1166        let classes = vec![
1167            ClassCohesion {
1168                class_name: "A".to_string(),
1169                file_path: "test.py".to_string(),
1170                line: 1,
1171                lcom4: 1,
1172                method_count: 3,
1173                field_count: 2,
1174                verdict: CohesionVerdict::Cohesive,
1175                split_suggestion: None,
1176                components: vec![],
1177            },
1178            ClassCohesion {
1179                class_name: "B".to_string(),
1180                file_path: "test.py".to_string(),
1181                line: 10,
1182                lcom4: 2,
1183                method_count: 4,
1184                field_count: 3,
1185                verdict: CohesionVerdict::SplitCandidate,
1186                split_suggestion: Some("Split B".to_string()),
1187                components: vec![],
1188            },
1189        ];
1190
1191        let summary = compute_summary(&classes);
1192        assert_eq!(summary.total_classes, 2);
1193        assert_eq!(summary.cohesive, 1);
1194        assert_eq!(summary.split_candidates, 1);
1195        assert!((summary.avg_lcom4 - 1.5).abs() < 0.01);
1196    }
1197
1198    #[test]
1199    fn test_generate_split_suggestion() {
1200        let components = vec![
1201            ComponentInfo {
1202                methods: vec!["method_a".to_string(), "method_b".to_string()],
1203                fields: vec!["field_x".to_string()],
1204            },
1205            ComponentInfo {
1206                methods: vec!["method_c".to_string()],
1207                fields: vec!["field_y".to_string()],
1208            },
1209        ];
1210
1211        let suggestion = generate_split_suggestion("MyClass", &components);
1212        assert!(suggestion.contains("MyClass"));
1213        assert!(suggestion.contains("2 classes"));
1214        assert!(suggestion.contains("method_a"));
1215        assert!(suggestion.contains("method_c"));
1216    }
1217
1218    // =========================================================================
1219    // format_cohesion_text tests
1220    // =========================================================================
1221
1222    /// Helper to build a ClassCohesion for tests.
1223    fn make_class(
1224        name: &str,
1225        location: (&str, u32),
1226        lcom4: u32,
1227        methods: u32,
1228        fields: u32,
1229        components: Vec<ComponentInfo>,
1230        suggestion: Option<&str>,
1231    ) -> ClassCohesion {
1232        let (file, line) = location;
1233        ClassCohesion {
1234            class_name: name.to_string(),
1235            file_path: file.to_string(),
1236            line,
1237            lcom4,
1238            method_count: methods,
1239            field_count: fields,
1240            verdict: CohesionVerdict::from_lcom4(lcom4),
1241            split_suggestion: suggestion.map(|s| s.to_string()),
1242            components,
1243        }
1244    }
1245
1246    #[test]
1247    fn test_format_cohesion_text_sorts_worst_first() {
1248        let report = CohesionReport {
1249            classes: vec![
1250                make_class("Low", ("src/a.py", 1), 2, 3, 2, vec![], None),
1251                make_class("High", ("src/b.py", 5), 5, 8, 6, vec![], None),
1252                make_class("Mid", ("src/c.py", 10), 3, 5, 4, vec![], None),
1253            ],
1254            summary: CohesionSummary {
1255                total_classes: 3,
1256                cohesive: 0,
1257                split_candidates: 3,
1258                avg_lcom4: 3.33,
1259            },
1260        };
1261        let text = format_cohesion_text(&report);
1262        // "High" (LCOM4=5) should appear before "Mid" (3) before "Low" (2)
1263        let high_pos = text.find("High").expect("High not found");
1264        let mid_pos = text.find("Mid").expect("Mid not found");
1265        let low_pos = text.find("Low").expect("Low not found");
1266        assert!(
1267            high_pos < mid_pos,
1268            "High (LCOM4=5) should appear before Mid (LCOM4=3)"
1269        );
1270        assert!(
1271            mid_pos < low_pos,
1272            "Mid (LCOM4=3) should appear before Low (LCOM4=2)"
1273        );
1274    }
1275
1276    #[test]
1277    fn test_format_cohesion_text_filters_cohesive_classes() {
1278        let report = CohesionReport {
1279            classes: vec![
1280                make_class("Cohesive", ("src/a.py", 1), 1, 3, 2, vec![], None),
1281                make_class("NeedsSplit", ("src/b.py", 5), 3, 6, 4, vec![], None),
1282            ],
1283            summary: CohesionSummary {
1284                total_classes: 2,
1285                cohesive: 1,
1286                split_candidates: 1,
1287                avg_lcom4: 2.0,
1288            },
1289        };
1290        let text = format_cohesion_text(&report);
1291        // Cohesive class (LCOM4=1) should NOT appear in the table rows
1292        // but NeedsSplit (LCOM4=3) should appear
1293        assert!(
1294            !text.contains("Cohesive"),
1295            "Cohesive classes should be filtered out"
1296        );
1297        assert!(
1298            text.contains("NeedsSplit"),
1299            "Split candidates should appear"
1300        );
1301    }
1302
1303    #[test]
1304    fn test_format_cohesion_text_limits_to_30() {
1305        // Create 35 split candidates
1306        let classes: Vec<ClassCohesion> = (0..35)
1307            .map(|i| {
1308                make_class(
1309                    &format!("Class{}", i),
1310                    (&format!("src/mod{}.py", i), i + 1),
1311                    2,
1312                    4,
1313                    3,
1314                    vec![],
1315                    None,
1316                )
1317            })
1318            .collect();
1319        let report = CohesionReport {
1320            classes,
1321            summary: CohesionSummary {
1322                total_classes: 35,
1323                cohesive: 0,
1324                split_candidates: 35,
1325                avg_lcom4: 2.0,
1326            },
1327        };
1328        let text = format_cohesion_text(&report);
1329        assert!(
1330            text.contains("and 5 more"),
1331            "Should show overflow message for remaining 5 classes"
1332        );
1333    }
1334
1335    #[test]
1336    fn test_format_cohesion_text_strips_common_path_prefix() {
1337        let report = CohesionReport {
1338            classes: vec![
1339                make_class("A", ("src/models/user.py", 1), 3, 5, 4, vec![], None),
1340                make_class("B", ("src/models/order.py", 10), 2, 4, 3, vec![], None),
1341            ],
1342            summary: CohesionSummary {
1343                total_classes: 2,
1344                cohesive: 0,
1345                split_candidates: 2,
1346                avg_lcom4: 2.5,
1347            },
1348        };
1349        let text = format_cohesion_text(&report);
1350        // The common prefix "src/models/" should be stripped, showing just filenames
1351        assert!(
1352            text.contains("user.py"),
1353            "Should display stripped path: user.py"
1354        );
1355        assert!(
1356            text.contains("order.py"),
1357            "Should display stripped path: order.py"
1358        );
1359        // Full path should not appear
1360        assert!(
1361            !text.contains("src/models/user.py"),
1362            "Full path should be stripped"
1363        );
1364    }
1365
1366    #[test]
1367    fn test_format_cohesion_text_has_header() {
1368        let report = CohesionReport {
1369            classes: vec![make_class("A", ("src/a.py", 1), 2, 3, 2, vec![], None)],
1370            summary: CohesionSummary {
1371                total_classes: 1,
1372                cohesive: 0,
1373                split_candidates: 1,
1374                avg_lcom4: 2.0,
1375            },
1376        };
1377        let text = format_cohesion_text(&report);
1378        assert!(
1379            text.contains("Cohesion Analysis"),
1380            "Should have title header"
1381        );
1382        assert!(
1383            text.contains("LCOM4") && text.contains("Methods") && text.contains("Fields"),
1384            "Should have column headers"
1385        );
1386        assert!(
1387            text.contains("Class") && text.contains("File"),
1388            "Should have Class and File columns"
1389        );
1390    }
1391
1392    #[test]
1393    fn test_format_cohesion_text_summary_line() {
1394        let report = CohesionReport {
1395            classes: vec![],
1396            summary: CohesionSummary {
1397                total_classes: 47,
1398                cohesive: 35,
1399                split_candidates: 12,
1400                avg_lcom4: 1.82,
1401            },
1402        };
1403        let text = format_cohesion_text(&report);
1404        assert!(
1405            text.contains("47 classes"),
1406            "Summary should show total classes"
1407        );
1408        assert!(
1409            text.contains("12 split candidates"),
1410            "Summary should show split candidate count"
1411        );
1412        assert!(text.contains("1.82"), "Summary should show avg LCOM4");
1413    }
1414
1415    #[test]
1416    fn test_format_cohesion_text_shows_components() {
1417        let components = vec![
1418            ComponentInfo {
1419                methods: vec!["create".to_string(), "update".to_string()],
1420                fields: vec!["db".to_string(), "cache".to_string()],
1421            },
1422            ComponentInfo {
1423                methods: vec!["send_email".to_string()],
1424                fields: vec!["mailer".to_string()],
1425            },
1426        ];
1427        let report = CohesionReport {
1428            classes: vec![make_class(
1429                "UserManager",
1430                ("src/user.py", 1),
1431                2,
1432                3,
1433                3,
1434                components,
1435                Some("Split into 2 focused classes"),
1436            )],
1437            summary: CohesionSummary {
1438                total_classes: 1,
1439                cohesive: 0,
1440                split_candidates: 1,
1441                avg_lcom4: 2.0,
1442            },
1443        };
1444        let text = format_cohesion_text(&report);
1445        // Should show component info
1446        assert!(text.contains("Component 1"), "Should show Component 1");
1447        assert!(
1448            text.contains("create") && text.contains("update"),
1449            "Should show methods in component"
1450        );
1451        assert!(
1452            text.contains("db") && text.contains("cache"),
1453            "Should show fields in component"
1454        );
1455        assert!(text.contains("Component 2"), "Should show Component 2");
1456        assert!(
1457            text.contains("send_email"),
1458            "Should show methods in component 2"
1459        );
1460        // Should show suggestion
1461        assert!(
1462            text.contains("Split into 2 focused classes"),
1463            "Should show split suggestion"
1464        );
1465    }
1466
1467    #[test]
1468    fn test_format_cohesion_text_empty_report() {
1469        let report = CohesionReport {
1470            classes: vec![],
1471            summary: CohesionSummary {
1472                total_classes: 0,
1473                cohesive: 0,
1474                split_candidates: 0,
1475                avg_lcom4: 0.0,
1476            },
1477        };
1478        let text = format_cohesion_text(&report);
1479        assert!(
1480            text.contains("No split candidates"),
1481            "Empty report should show 'No split candidates' message"
1482        );
1483    }
1484
1485    #[test]
1486    fn test_format_cohesion_text_all_cohesive() {
1487        let report = CohesionReport {
1488            classes: vec![
1489                make_class("Good1", ("src/a.py", 1), 1, 5, 3, vec![], None),
1490                make_class("Good2", ("src/b.py", 10), 1, 4, 2, vec![], None),
1491            ],
1492            summary: CohesionSummary {
1493                total_classes: 2,
1494                cohesive: 2,
1495                split_candidates: 0,
1496                avg_lcom4: 1.0,
1497            },
1498        };
1499        let text = format_cohesion_text(&report);
1500        // All classes are cohesive, so no table rows should appear
1501        assert!(
1502            text.contains("No split candidates"),
1503            "All-cohesive report should show 'No split candidates'"
1504        );
1505    }
1506
1507    #[test]
1508    fn test_cohesion_args_lang_flag() {
1509        // Verify CohesionArgs has a lang field of type Option<Language>
1510        let args = CohesionArgs {
1511            path: PathBuf::from("src/"),
1512            min_methods: 2,
1513            include_dunder: false,
1514            output_format: OutputFormat::Json,
1515            timeout: 30,
1516            project_root: None,
1517            lang: Some(Language::Rust),
1518        };
1519        assert_eq!(args.lang, Some(Language::Rust));
1520
1521        // Also test None case (auto-detect)
1522        let args_auto = CohesionArgs {
1523            path: PathBuf::from("src/"),
1524            min_methods: 2,
1525            include_dunder: false,
1526            output_format: OutputFormat::Json,
1527            timeout: 30,
1528            project_root: None,
1529            lang: None,
1530        };
1531        assert_eq!(args_auto.lang, None);
1532    }
1533}