use std::collections::{HashMap, HashSet, VecDeque};
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::debug;
use tree_sitter::Node;
use crate::ast::AstExtractor;
use crate::callgraph::scanner::{ProjectScanner, ScanConfig};
use crate::lang::LanguageRegistry;
#[derive(Error, Debug)]
pub enum GodClassError {
#[error("Project scan failed: {0}")]
ScanError(String),
#[error("Parse error in {file}: {message}")]
ParseError { file: String, message: String },
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Path not found: {0}")]
NotFound(String),
}
const DEFAULT_METHOD_THRESHOLD: u32 = 20;
const DEFAULT_ATTRIBUTE_THRESHOLD: u32 = 15;
const DEFAULT_LINE_THRESHOLD: u32 = 500;
const DEFAULT_LCOM_THRESHOLD: u32 = 2;
const DEFAULT_SCORE_THRESHOLD: f64 = 10.0;
const METHOD_PENALTY_WEIGHT: f64 = 2.0;
const ATTRIBUTE_PENALTY_WEIGHT: f64 = 1.0;
const LINE_PENALTY_WEIGHT: f64 = 1.0;
const LCOM_PENALTY_WEIGHT: f64 = 5.0;
const COMPLEXITY_PENALTY_WEIGHT: f64 = 0.1;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GodClassConfig {
pub method_threshold: u32,
pub attribute_threshold: u32,
pub line_threshold: u32,
pub lcom_threshold: u32,
pub score_threshold: f64,
pub exclude_tests: bool,
pub exclude_framework: bool,
pub exclude_generated: bool,
pub framework_patterns: Vec<String>,
pub generated_markers: Vec<String>,
pub language: Option<String>,
pub max_file_size: u64,
}
impl Default for GodClassConfig {
fn default() -> Self {
Self {
method_threshold: DEFAULT_METHOD_THRESHOLD,
attribute_threshold: DEFAULT_ATTRIBUTE_THRESHOLD,
line_threshold: DEFAULT_LINE_THRESHOLD,
lcom_threshold: DEFAULT_LCOM_THRESHOLD,
score_threshold: DEFAULT_SCORE_THRESHOLD,
exclude_tests: true,
exclude_framework: false,
exclude_generated: true,
framework_patterns: vec![
"Controller".to_string(),
"View".to_string(),
"ViewModel".to_string(),
"Activity".to_string(),
"Fragment".to_string(),
"Component".to_string(),
"Service".to_string(),
],
generated_markers: vec![
"generated".to_string(),
"auto_generated".to_string(),
"_pb2".to_string(),
".gen.".to_string(),
"codegen".to_string(),
],
language: None,
max_file_size: 1024 * 1024, }
}
}
impl GodClassConfig {
#[must_use]
pub fn with_threshold(mut self, threshold: f64) -> Self {
self.score_threshold = threshold;
self
}
#[must_use]
pub fn with_language(mut self, lang: &str) -> Self {
self.language = Some(lang.to_string());
self
}
#[must_use]
pub fn include_tests(mut self) -> Self {
self.exclude_tests = false;
self
}
#[must_use]
pub fn include_framework(mut self) -> Self {
self.exclude_framework = false;
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum GodClassSeverity {
Low,
Medium,
High,
Critical,
}
impl GodClassSeverity {
#[must_use]
pub fn from_score(score: f64) -> Self {
if score >= 50.0 {
Self::Critical
} else if score >= 35.0 {
Self::High
} else if score >= 20.0 {
Self::Medium
} else {
Self::Low
}
}
#[must_use]
pub const fn description(&self) -> &'static str {
match self {
Self::Low => "Minor issues, consider reviewing",
Self::Medium => "Notable issues, should refactor",
Self::High => "Serious issues, strongly recommend refactoring",
Self::Critical => "Severe issues, refactor immediately",
}
}
#[must_use]
pub const fn color_code(&self) -> &'static str {
match self {
Self::Low => "\x1b[33m", Self::Medium => "\x1b[38;5;208m", Self::High => "\x1b[31m", Self::Critical => "\x1b[35m", }
}
}
impl std::fmt::Display for GodClassSeverity {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Low => write!(f, "low"),
Self::Medium => write!(f, "medium"),
Self::High => write!(f, "high"),
Self::Critical => write!(f, "critical"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GodClassIndicators {
pub method_count: u32,
pub attribute_count: u32,
pub line_count: u32,
pub lcom: u32,
pub coupling: u32,
pub complexity_sum: u32,
pub avg_complexity: f64,
pub public_methods: u32,
pub private_methods: u32,
}
impl Default for GodClassIndicators {
fn default() -> Self {
Self {
method_count: 0,
attribute_count: 0,
line_count: 0,
lcom: 1,
coupling: 0,
complexity_sum: 0,
avg_complexity: 0.0,
public_methods: 0,
private_methods: 0,
}
}
}
impl GodClassIndicators {
#[must_use]
pub fn calculate_score(&self, config: &GodClassConfig) -> f64 {
let mut score = 0.0;
if self.method_count > config.method_threshold {
let excess = self.method_count - config.method_threshold;
score += f64::from(excess) * METHOD_PENALTY_WEIGHT;
}
if self.attribute_count > config.attribute_threshold {
let excess = self.attribute_count - config.attribute_threshold;
score += f64::from(excess) * ATTRIBUTE_PENALTY_WEIGHT;
}
if self.line_count > config.line_threshold {
let excess = self.line_count - config.line_threshold;
score += (f64::from(excess) / 100.0) * LINE_PENALTY_WEIGHT;
}
if self.lcom > config.lcom_threshold {
let excess = self.lcom - config.lcom_threshold;
score += f64::from(excess) * LCOM_PENALTY_WEIGHT;
}
let expected_complexity = self.method_count * 3;
if self.complexity_sum > expected_complexity {
let excess = self.complexity_sum - expected_complexity;
score += f64::from(excess) * COMPLEXITY_PENALTY_WEIGHT;
}
score
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SuggestedClass {
pub name_hint: String,
pub methods: Vec<String>,
pub attributes: Vec<String>,
pub cohesion: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GodClassFinding {
pub class_name: String,
pub file: PathBuf,
pub line: usize,
pub end_line: usize,
pub indicators: GodClassIndicators,
pub score: f64,
pub severity: GodClassSeverity,
pub suggested_splits: Vec<SuggestedClass>,
#[serde(skip_serializing_if = "HashMap::is_empty")]
pub score_breakdown: HashMap<String, f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub exclusion_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GodClassStats {
pub total_classes: usize,
pub god_classes: usize,
pub excluded_classes: usize,
pub god_class_percentage: f64,
pub severity_distribution: HashMap<String, usize>,
pub average_score: f64,
pub max_score: f64,
pub affected_files: usize,
}
impl GodClassStats {
fn from_findings(findings: &[GodClassFinding], total_analyzed: usize, excluded: usize) -> Self {
if findings.is_empty() {
return Self {
total_classes: total_analyzed,
god_classes: 0,
excluded_classes: excluded,
god_class_percentage: 0.0,
severity_distribution: HashMap::new(),
average_score: 0.0,
max_score: 0.0,
affected_files: 0,
};
}
let mut severity_dist: HashMap<String, usize> = HashMap::new();
let mut affected_files: HashSet<PathBuf> = HashSet::new();
let mut score_sum = 0.0;
let mut max_score = 0.0f64;
for finding in findings {
*severity_dist.entry(finding.severity.to_string()).or_insert(0) += 1;
affected_files.insert(finding.file.clone());
score_sum += finding.score;
max_score = max_score.max(finding.score);
}
Self {
total_classes: total_analyzed,
god_classes: findings.len(),
excluded_classes: excluded,
god_class_percentage: (findings.len() as f64 / total_analyzed as f64) * 100.0,
severity_distribution: severity_dist,
average_score: score_sum / findings.len() as f64,
max_score,
affected_files: affected_files.len(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GodClassAnalysis {
pub path: PathBuf,
pub language: Option<String>,
pub config: GodClassConfig,
pub findings: Vec<GodClassFinding>,
pub stats: GodClassStats,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub errors: Vec<FileError>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileError {
pub file: PathBuf,
pub message: String,
}
pub fn detect_god_classes(
path: impl AsRef<Path>,
language: Option<&str>,
threshold: Option<f64>,
) -> Result<GodClassAnalysis, GodClassError> {
let path = path.as_ref();
let mut config = GodClassConfig::default();
if let Some(lang) = language {
config.language = Some(lang.to_string());
}
if let Some(t) = threshold {
config.score_threshold = t;
}
detect_with_config(path, config)
}
pub fn detect_with_config(
path: &Path,
config: GodClassConfig,
) -> Result<GodClassAnalysis, GodClassError> {
if !path.exists() {
return Err(GodClassError::NotFound(path.display().to_string()));
}
if path.is_file() {
return detect_in_file(path, &config);
}
let path_str = path.to_str().ok_or_else(|| {
GodClassError::ScanError("Invalid path encoding".to_string())
})?;
let scanner = ProjectScanner::new(path_str)
.map_err(|e| GodClassError::ScanError(e.to_string()))?;
let scan_config = if let Some(ref lang) = config.language {
ScanConfig::for_language(lang)
} else {
ScanConfig::default()
};
let scan_result = scanner.scan_with_config(&scan_config)
.map_err(|e| GodClassError::ScanError(e.to_string()))?;
if scan_result.files.is_empty() {
return Err(GodClassError::ScanError(format!(
"No source files found in {} (filter: {:?})",
path.display(),
config.language
)));
}
debug!("Analyzing {} files for God classes", scan_result.files.len());
let results: Vec<(Vec<GodClassFinding>, Vec<FileError>, usize, usize)> = scan_result
.files
.par_iter()
.filter(|f| {
std::fs::metadata(f)
.map(|m| m.len() <= config.max_file_size)
.unwrap_or(false)
})
.map(|file| analyze_file_for_god_classes(file, &config))
.collect();
let mut all_findings = Vec::new();
let mut all_errors = Vec::new();
let mut total_analyzed = 0usize;
let mut total_excluded = 0usize;
for (findings, errors, analyzed, excluded) in results {
all_findings.extend(findings);
all_errors.extend(errors);
total_analyzed += analyzed;
total_excluded += excluded;
}
all_findings.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
let stats = GodClassStats::from_findings(&all_findings, total_analyzed, total_excluded);
Ok(GodClassAnalysis {
path: path.to_path_buf(),
language: config.language.clone(),
config,
findings: all_findings,
stats,
errors: all_errors,
})
}
fn detect_in_file(
file: &Path,
config: &GodClassConfig,
) -> Result<GodClassAnalysis, GodClassError> {
let (findings, errors, analyzed, excluded) = analyze_file_for_god_classes(file, config);
let stats = GodClassStats::from_findings(&findings, analyzed, excluded);
let registry = LanguageRegistry::global();
let language = registry
.detect_language(file)
.map(|l| l.name().to_string());
Ok(GodClassAnalysis {
path: file.to_path_buf(),
language,
config: config.clone(),
findings,
stats,
errors,
})
}
fn analyze_file_for_god_classes(
file: &Path,
config: &GodClassConfig,
) -> (Vec<GodClassFinding>, Vec<FileError>, usize, usize) {
let mut findings = Vec::new();
let mut errors = Vec::new();
let mut total_analyzed = 0usize;
let mut total_excluded = 0usize;
if config.exclude_generated {
let path_str = file.to_string_lossy().to_lowercase();
for marker in &config.generated_markers {
if path_str.contains(&marker.to_lowercase()) {
return (findings, errors, 0, 0);
}
}
}
let module = match AstExtractor::extract_file(file) {
Ok(m) => m,
Err(e) => {
errors.push(FileError {
file: file.to_path_buf(),
message: format!("Failed to parse file: {}", e),
});
return (findings, errors, total_analyzed, total_excluded);
}
};
let source = match std::fs::read(file) {
Ok(s) => s,
Err(e) => {
errors.push(FileError {
file: file.to_path_buf(),
message: format!("Failed to read file: {}", e),
});
return (findings, errors, total_analyzed, total_excluded);
}
};
let language = &module.language;
let registry = LanguageRegistry::global();
let lang_impl = match registry.detect_language(file) {
Some(l) => l,
None => {
return (findings, errors, total_analyzed, total_excluded);
}
};
let mut parser = match lang_impl.parser() {
Ok(p) => p,
Err(e) => {
errors.push(FileError {
file: file.to_path_buf(),
message: format!("Parser error: {}", e),
});
return (findings, errors, total_analyzed, total_excluded);
}
};
let tree = match parser.parse(&source, None) {
Some(t) => t,
None => {
errors.push(FileError {
file: file.to_path_buf(),
message: "Failed to parse file".to_string(),
});
return (findings, errors, total_analyzed, total_excluded);
}
};
for class in &module.classes {
total_analyzed += 1;
let exclusion_reason = check_exclusion(class, config, language);
if exclusion_reason.is_some() {
total_excluded += 1;
continue;
}
if let Some(finding) = analyze_class_for_god_class(
file,
class,
&tree,
&source,
language,
config,
) {
if finding.score >= config.score_threshold {
findings.push(finding);
}
}
for inner in &class.inner_classes {
total_analyzed += 1;
let exclusion_reason = check_exclusion(inner, config, language);
if exclusion_reason.is_some() {
total_excluded += 1;
continue;
}
if let Some(finding) = analyze_class_for_god_class(
file,
inner,
&tree,
&source,
language,
config,
) {
if finding.score >= config.score_threshold {
findings.push(finding);
}
}
}
}
(findings, errors, total_analyzed, total_excluded)
}
fn check_exclusion(
class: &crate::ast::types::ClassInfo,
config: &GodClassConfig,
_language: &str,
) -> Option<String> {
let name = &class.name;
let name_lower = name.to_lowercase();
if config.exclude_tests {
let is_test = name_lower.contains("test")
|| name_lower.contains("spec")
|| name_lower.contains("mock")
|| class.bases.iter().any(|b| {
let b_lower = b.to_lowercase();
b_lower.contains("testcase") || b_lower.contains("unittest")
})
|| class.decorators.iter().any(|d| d.contains("test"));
if is_test {
return Some("Test class".to_string());
}
}
if config.exclude_framework {
for pattern in &config.framework_patterns {
if name.contains(pattern) || class.bases.iter().any(|b| b.contains(pattern)) {
return Some(format!("Framework class ({})", pattern));
}
}
}
None
}
fn analyze_class_for_god_class(
file: &Path,
class: &crate::ast::types::ClassInfo,
tree: &tree_sitter::Tree,
source: &[u8],
language: &str,
config: &GodClassConfig,
) -> Option<GodClassFinding> {
if class.methods.len() < 3 {
return None;
}
let line_count = class.end_line_number
.unwrap_or(class.line_number)
.saturating_sub(class.line_number) + 1;
let mut method_attributes: HashMap<String, HashSet<String>> = HashMap::new();
let mut all_attributes: HashSet<String> = HashSet::new();
let mut method_calls: HashMap<String, HashSet<String>> = HashMap::new();
let mut complexity_sum = 0u32;
let mut public_methods = 0u32;
let mut private_methods = 0u32;
let method_names: HashSet<String> = class.methods
.iter()
.filter(|m| !is_static_method(m))
.map(|m| m.name.clone())
.collect();
let root = tree.root_node();
if let Some(class_node) = find_class_node(root, &class.name, class.line_number) {
for method in &class.methods {
if is_static_method(method) {
continue;
}
if is_private_method(&method.name, language) {
private_methods += 1;
} else {
public_methods += 1;
}
if let Some(method_node) = find_method_node(class_node, &method.name, method.line_number, language) {
let attrs = extract_attribute_accesses(method_node, source, language);
all_attributes.extend(attrs.clone());
method_attributes.insert(method.name.clone(), attrs);
let calls = extract_method_calls_internal(method_node, source, language, &method_names);
method_calls.insert(method.name.clone(), calls);
let complexity = estimate_complexity(method_node, language);
complexity_sum += complexity;
}
}
}
let method_count = method_names.len() as u32;
let attribute_count = all_attributes.len() as u32;
let lcom = calculate_lcom(&method_attributes, &method_calls);
let coupling = estimate_coupling(class);
let avg_complexity = if method_count > 0 {
complexity_sum as f64 / method_count as f64
} else {
0.0
};
let indicators = GodClassIndicators {
method_count,
attribute_count,
line_count: line_count as u32,
lcom,
coupling,
complexity_sum,
avg_complexity,
public_methods,
private_methods,
};
let score = indicators.calculate_score(config);
let mut score_breakdown = HashMap::new();
if indicators.method_count > config.method_threshold {
let penalty = f64::from(indicators.method_count - config.method_threshold) * METHOD_PENALTY_WEIGHT;
score_breakdown.insert("methods".to_string(), penalty);
}
if indicators.attribute_count > config.attribute_threshold {
let penalty = f64::from(indicators.attribute_count - config.attribute_threshold) * ATTRIBUTE_PENALTY_WEIGHT;
score_breakdown.insert("attributes".to_string(), penalty);
}
if indicators.line_count > config.line_threshold {
let penalty = (f64::from(indicators.line_count - config.line_threshold) / 100.0) * LINE_PENALTY_WEIGHT;
score_breakdown.insert("lines".to_string(), penalty);
}
if indicators.lcom > config.lcom_threshold {
let penalty = f64::from(indicators.lcom - config.lcom_threshold) * LCOM_PENALTY_WEIGHT;
score_breakdown.insert("lcom".to_string(), penalty);
}
let suggested_splits = suggest_class_splits(&method_attributes, &method_calls);
let severity = GodClassSeverity::from_score(score);
Some(GodClassFinding {
class_name: class.name.clone(),
file: file.to_path_buf(),
line: class.line_number,
end_line: class.end_line_number.unwrap_or(class.line_number),
indicators,
score,
severity,
suggested_splits,
score_breakdown,
exclusion_reason: None,
})
}
fn is_static_method(method: &crate::ast::types::FunctionInfo) -> bool {
method.decorators.iter().any(|d| {
d.contains("staticmethod") || d.contains("static") || d == "@staticmethod"
})
}
fn is_private_method(name: &str, language: &str) -> bool {
match language {
"python" => name.starts_with('_') && !name.starts_with("__"),
"typescript" | "javascript" | "tsx" | "jsx" => name.starts_with('#') || name.starts_with('_'),
"rust" => false, "java" | "kotlin" | "csharp" => false, _ => name.starts_with('_'),
}
}
fn extract_attribute_accesses(
node: Node,
source: &[u8],
language: &str,
) -> HashSet<String> {
let mut attributes = HashSet::new();
extract_attrs_recursive(node, source, language, &mut attributes);
attributes
}
fn extract_attrs_recursive(
node: Node,
source: &[u8],
language: &str,
attributes: &mut HashSet<String>,
) {
let node_kind = node.kind();
match language {
"python" => {
if node_kind == "attribute" {
if let Some(object) = node.child_by_field_name("object") {
let obj_text = node_text(object, source);
if obj_text == "self" {
if let Some(attr) = node.child_by_field_name("attribute") {
let attr_name = node_text(attr, source);
if !attr_name.starts_with("__") || !attr_name.ends_with("__") {
attributes.insert(attr_name.to_string());
}
}
}
}
}
}
"typescript" | "javascript" | "tsx" | "jsx" => {
if node_kind == "member_expression" {
if let Some(object) = node.child_by_field_name("object") {
let obj_text = node_text(object, source);
if obj_text == "this" {
if let Some(prop) = node.child_by_field_name("property") {
attributes.insert(node_text(prop, source).to_string());
}
}
}
}
}
"rust" => {
if node_kind == "field_expression" {
if let Some(value) = node.child_by_field_name("value") {
if node_text(value, source) == "self" {
if let Some(field) = node.child_by_field_name("field") {
attributes.insert(node_text(field, source).to_string());
}
}
}
}
}
"go" => {
if node_kind == "selector_expression" {
if let Some(operand) = node.child_by_field_name("operand") {
if operand.kind() == "identifier" {
let var_name = node_text(operand, source);
if var_name.len() <= 3 {
if let Some(field) = node.child_by_field_name("field") {
attributes.insert(node_text(field, source).to_string());
}
}
}
}
}
}
"java" | "kotlin" | "csharp" => {
if node_kind == "field_access" || node_kind == "member_access_expression" {
if let Some(object) = node.child_by_field_name("object") {
if node_text(object, source) == "this" {
if let Some(field) = node.child_by_field_name("field")
.or_else(|| node.child_by_field_name("name"))
{
attributes.insert(node_text(field, source).to_string());
}
}
}
}
}
"cpp" | "c" => {
if node_kind == "field_expression" {
if let Some(argument) = node.child_by_field_name("argument") {
if node_text(argument, source) == "this" {
if let Some(field) = node.child_by_field_name("field") {
attributes.insert(node_text(field, source).to_string());
}
}
}
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
extract_attrs_recursive(child, source, language, attributes);
}
}
fn extract_method_calls_internal(
node: Node,
source: &[u8],
language: &str,
class_methods: &HashSet<String>,
) -> HashSet<String> {
let mut calls = HashSet::new();
extract_calls_recursive(node, source, language, class_methods, &mut calls);
calls
}
fn extract_calls_recursive(
node: Node,
source: &[u8],
language: &str,
class_methods: &HashSet<String>,
calls: &mut HashSet<String>,
) {
let node_kind = node.kind();
match language {
"python" => {
if node_kind == "call" {
if let Some(func) = node.child_by_field_name("function") {
if func.kind() == "attribute" {
if let Some(obj) = func.child_by_field_name("object") {
if node_text(obj, source) == "self" {
if let Some(attr) = func.child_by_field_name("attribute") {
let method_name = node_text(attr, source);
if class_methods.contains(method_name) {
calls.insert(method_name.to_string());
}
}
}
}
}
}
}
}
"typescript" | "javascript" | "tsx" | "jsx" => {
if node_kind == "call_expression" {
if let Some(func) = node.child_by_field_name("function") {
if func.kind() == "member_expression" {
if let Some(obj) = func.child_by_field_name("object") {
if node_text(obj, source) == "this" {
if let Some(prop) = func.child_by_field_name("property") {
let method_name = node_text(prop, source);
if class_methods.contains(method_name) {
calls.insert(method_name.to_string());
}
}
}
}
}
}
}
}
"rust" => {
if node_kind == "call_expression" {
if let Some(func) = node.child_by_field_name("function") {
if func.kind() == "field_expression" {
if let Some(val) = func.child_by_field_name("value") {
if node_text(val, source) == "self" {
if let Some(field) = func.child_by_field_name("field") {
let method_name = node_text(field, source);
if class_methods.contains(method_name) {
calls.insert(method_name.to_string());
}
}
}
}
}
}
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
extract_calls_recursive(child, source, language, class_methods, calls);
}
}
fn calculate_lcom(
method_attributes: &HashMap<String, HashSet<String>>,
method_calls: &HashMap<String, HashSet<String>>,
) -> u32 {
let methods: Vec<&String> = method_attributes.keys().collect();
if methods.is_empty() {
return 0;
}
let mut adjacency: HashMap<&String, HashSet<&String>> = HashMap::new();
for m in &methods {
adjacency.insert(m, HashSet::new());
}
for (i, m1) in methods.iter().enumerate() {
for m2 in methods.iter().skip(i + 1) {
if let (Some(attrs1), Some(attrs2)) = (method_attributes.get(*m1), method_attributes.get(*m2)) {
if !attrs1.is_disjoint(attrs2) {
adjacency.get_mut(m1).unwrap().insert(m2);
adjacency.get_mut(m2).unwrap().insert(m1);
}
}
}
}
for (caller, callees) in method_calls {
if !methods.iter().any(|m| *m == caller) {
continue;
}
for callee in callees {
if methods.iter().any(|m| *m == callee) {
if let Some(caller_ref) = methods.iter().find(|m| **m == caller) {
if let Some(callee_ref) = methods.iter().find(|m| **m == callee) {
adjacency.get_mut(caller_ref).unwrap().insert(callee_ref);
adjacency.get_mut(callee_ref).unwrap().insert(caller_ref);
}
}
}
}
}
let mut visited: HashSet<&String> = HashSet::new();
let mut components = 0u32;
for method in &methods {
if visited.contains(method) {
continue;
}
components += 1;
let mut queue = VecDeque::new();
queue.push_back(*method);
visited.insert(method);
while let Some(current) = queue.pop_front() {
if let Some(neighbors) = adjacency.get(current) {
for neighbor in neighbors {
if !visited.contains(neighbor) {
visited.insert(neighbor);
queue.push_back(neighbor);
}
}
}
}
}
components
}
fn estimate_coupling(class: &crate::ast::types::ClassInfo) -> u32 {
let mut unique_types: HashSet<String> = HashSet::new();
for base in &class.bases {
if !is_builtin_type(base) {
unique_types.insert(base.clone());
}
}
for method in &class.methods {
if let Some(ref return_type) = method.return_type {
if !is_builtin_type(return_type) {
unique_types.insert(return_type.clone());
}
}
for param in &method.params {
if let Some(type_part) = param.split(':').nth(1) {
let type_name = type_part.trim();
if !is_builtin_type(type_name) {
unique_types.insert(type_name.to_string());
}
}
}
}
unique_types.len() as u32
}
fn is_builtin_type(type_name: &str) -> bool {
let builtins = [
"int", "str", "float", "bool", "list", "dict", "set", "tuple",
"None", "any", "Any", "void", "number", "string", "boolean",
"i32", "i64", "u32", "u64", "f32", "f64", "usize", "isize",
"String", "Vec", "Option", "Result",
];
builtins.iter().any(|b| type_name.starts_with(b))
}
fn estimate_complexity(node: Node, language: &str) -> u32 {
let mut complexity = 1u32;
let decision_kinds = match language {
"python" => vec!["if_statement", "elif_clause", "while_statement", "for_statement",
"except_clause", "with_statement", "and", "or", "match_statement",
"case_clause", "list_comprehension", "dictionary_comprehension"],
"typescript" | "javascript" | "tsx" | "jsx" => vec![
"if_statement", "while_statement", "for_statement", "for_in_statement",
"switch_case", "catch_clause", "ternary_expression", "&&", "||",
"optional_chain_expression"],
"rust" => vec!["if_expression", "while_expression", "for_expression",
"match_arm", "&&", "||", "?"],
"go" => vec!["if_statement", "for_statement", "switch_case", "select_case",
"&&", "||"],
"java" | "kotlin" | "csharp" => vec![
"if_statement", "while_statement", "for_statement", "enhanced_for_statement",
"switch_case", "catch_clause", "&&", "||", "ternary_expression"],
_ => vec!["if_statement", "while_statement", "for_statement", "case"],
};
count_decision_points(node, &decision_kinds, &mut complexity);
complexity
}
fn count_decision_points(node: Node, kinds: &[&str], complexity: &mut u32) {
if kinds.contains(&node.kind()) {
*complexity += 1;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
count_decision_points(child, kinds, complexity);
}
}
fn suggest_class_splits(
method_attributes: &HashMap<String, HashSet<String>>,
method_calls: &HashMap<String, HashSet<String>>,
) -> Vec<SuggestedClass> {
let methods: Vec<&String> = method_attributes.keys().collect();
if methods.len() <= 3 {
return Vec::new(); }
let mut adjacency: HashMap<&String, HashSet<&String>> = HashMap::new();
for m in &methods {
adjacency.insert(m, HashSet::new());
}
for (i, m1) in methods.iter().enumerate() {
for m2 in methods.iter().skip(i + 1) {
if let (Some(attrs1), Some(attrs2)) = (method_attributes.get(*m1), method_attributes.get(*m2)) {
if !attrs1.is_disjoint(attrs2) {
adjacency.get_mut(m1).unwrap().insert(m2);
adjacency.get_mut(m2).unwrap().insert(m1);
}
}
}
}
for (caller, callees) in method_calls {
for callee in callees {
if let (Some(caller_ref), Some(callee_ref)) = (
methods.iter().find(|m| **m == caller),
methods.iter().find(|m| **m == callee),
) {
adjacency.get_mut(caller_ref).unwrap().insert(callee_ref);
adjacency.get_mut(callee_ref).unwrap().insert(caller_ref);
}
}
}
let mut visited: HashSet<&String> = HashSet::new();
let mut components: Vec<Vec<String>> = Vec::new();
for method in &methods {
if visited.contains(method) {
continue;
}
let mut component = Vec::new();
let mut queue = VecDeque::new();
queue.push_back(*method);
visited.insert(method);
while let Some(current) = queue.pop_front() {
component.push(current.clone());
if let Some(neighbors) = adjacency.get(current) {
for neighbor in neighbors {
if !visited.contains(neighbor) {
visited.insert(neighbor);
queue.push_back(neighbor);
}
}
}
}
if component.len() >= 2 {
components.push(component);
}
}
if components.len() < 2 {
return Vec::new();
}
components
.into_iter()
.enumerate()
.map(|(i, methods_vec)| {
let mut attrs: HashSet<String> = HashSet::new();
for method in &methods_vec {
if let Some(method_attrs) = method_attributes.get(method) {
attrs.extend(method_attrs.clone());
}
}
let name_hint = generate_name_hint(&methods_vec, &attrs, i);
let cohesion = if methods_vec.len() > 1 {
let total_pairs = methods_vec.len() * (methods_vec.len() - 1) / 2;
let mut sharing_pairs = 0;
for (j, m1) in methods_vec.iter().enumerate() {
for m2 in methods_vec.iter().skip(j + 1) {
if let (Some(a1), Some(a2)) = (method_attributes.get(m1), method_attributes.get(m2)) {
if !a1.is_disjoint(a2) {
sharing_pairs += 1;
}
}
}
}
if total_pairs > 0 {
sharing_pairs as f64 / total_pairs as f64
} else {
1.0
}
} else {
1.0
};
SuggestedClass {
name_hint,
methods: methods_vec,
attributes: attrs.into_iter().collect(),
cohesion,
}
})
.collect()
}
fn generate_name_hint(methods: &[String], attributes: &HashSet<String>, index: usize) -> String {
let prefixes: Vec<&str> = methods
.iter()
.filter_map(|m| {
if let Some(pos) = m.find('_') {
Some(&m[..pos])
} else {
None
}
})
.collect();
let mut prefix_counts: HashMap<&str, usize> = HashMap::new();
for prefix in &prefixes {
*prefix_counts.entry(prefix).or_insert(0) += 1;
}
if let Some((prefix, count)) = prefix_counts.iter().max_by_key(|(_, c)| *c) {
if *count >= methods.len() / 2 && *count >= 2 {
let capitalized = capitalize(prefix);
return format!("{}Handler", capitalized);
}
}
if !attributes.is_empty() {
let attr_list: Vec<&String> = attributes.iter().collect();
if let Some(first_attr) = attr_list.first() {
if first_attr.len() > 3 {
let capitalized = capitalize(first_attr);
return format!("{}Manager", capitalized);
}
}
}
format!("Component{}", index + 1)
}
fn capitalize(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
}
}
fn find_class_node<'a>(node: Node<'a>, _class_name: &str, line: usize) -> Option<Node<'a>> {
let node_kind = node.kind();
let is_class = matches!(
node_kind,
"class_definition" | "class_declaration" | "class" |
"impl_item" | "struct_item" | "type_declaration"
);
if is_class {
let node_line = node.start_position().row + 1;
if node_line == line {
return Some(node);
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if let Some(found) = find_class_node(child, _class_name, line) {
return Some(found);
}
}
None
}
fn find_method_node<'a>(
class_node: Node<'a>,
_method_name: &str,
line: usize,
language: &str,
) -> Option<Node<'a>> {
let method_kinds = match language {
"python" => vec!["function_definition"],
"typescript" | "javascript" | "tsx" | "jsx" => vec!["method_definition", "function_declaration"],
"rust" => vec!["function_item"],
"go" => vec!["function_declaration", "method_declaration"],
"java" | "kotlin" | "csharp" => vec!["method_declaration", "function_declaration"],
"cpp" | "c" => vec!["function_definition"],
_ => vec!["function_definition", "method_definition"],
};
find_method_recursive(class_node, &method_kinds, line)
}
fn find_method_recursive<'a>(
node: Node<'a>,
method_kinds: &[&str],
line: usize,
) -> Option<Node<'a>> {
if method_kinds.contains(&node.kind()) {
let node_line = node.start_position().row + 1;
if node_line == line {
return Some(node);
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if let Some(found) = find_method_recursive(child, method_kinds, line) {
return Some(found);
}
}
None
}
fn node_text<'a>(node: Node<'a>, source: &'a [u8]) -> &'a str {
std::str::from_utf8(&source[node.start_byte()..node.end_byte()]).unwrap_or("")
}
#[must_use]
pub fn format_god_class_summary(analysis: &GodClassAnalysis) -> String {
let mut output = String::new();
output.push_str(&format!(
"God Class Analysis: {}\n",
analysis.path.display()
));
output.push_str(&"=".repeat(60));
output.push_str("\n\n");
output.push_str("Summary:\n");
output.push_str(&format!(" Total classes analyzed: {}\n", analysis.stats.total_classes));
output.push_str(&format!(" God classes detected: {} ({:.1}%)\n",
analysis.stats.god_classes,
analysis.stats.god_class_percentage
));
output.push_str(&format!(" Excluded classes: {}\n", analysis.stats.excluded_classes));
output.push_str(&format!(" Affected files: {}\n", analysis.stats.affected_files));
if !analysis.stats.severity_distribution.is_empty() {
output.push_str("\nSeverity Distribution:\n");
for (severity, count) in &analysis.stats.severity_distribution {
output.push_str(&format!(" {}: {}\n", severity, count));
}
}
if analysis.stats.god_classes > 0 {
output.push_str(&format!("\nAverage score: {:.1}\n", analysis.stats.average_score));
output.push_str(&format!("Maximum score: {:.1}\n", analysis.stats.max_score));
}
output.push_str("\n");
if analysis.findings.is_empty() {
output.push_str("No God classes detected.\n");
} else {
output.push_str(&format!("Findings ({} God classes):\n\n", analysis.findings.len()));
for finding in &analysis.findings {
let color = finding.severity.color_code();
let reset = "\x1b[0m";
output.push_str(&format!(
"{}{}{} [{}]: {}\n",
color, finding.class_name, reset,
finding.severity,
finding.file.display()
));
output.push_str(&format!(" Lines: {}-{}\n", finding.line, finding.end_line));
output.push_str(&format!(" Score: {:.1}\n", finding.score));
output.push_str(&format!(
" Indicators: methods={}, attributes={}, lines={}, LCOM={}, complexity={}\n",
finding.indicators.method_count,
finding.indicators.attribute_count,
finding.indicators.line_count,
finding.indicators.lcom,
finding.indicators.complexity_sum
));
if !finding.score_breakdown.is_empty() {
output.push_str(" Score breakdown:");
for (reason, penalty) in &finding.score_breakdown {
output.push_str(&format!(" {}={:.1}", reason, penalty));
}
output.push_str("\n");
}
if !finding.suggested_splits.is_empty() {
output.push_str(" Suggested splits:\n");
for split in &finding.suggested_splits {
output.push_str(&format!(
" -> {} ({} methods, {:.0}% cohesion): {:?}\n",
split.name_hint,
split.methods.len(),
split.cohesion * 100.0,
split.methods
));
}
}
output.push_str("\n");
}
}
if !analysis.errors.is_empty() {
output.push_str(&format!("\nErrors ({} files):\n", analysis.errors.len()));
for error in &analysis.errors {
output.push_str(&format!(" {}: {}\n", error.file.display(), error.message));
}
}
output
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_file(content: &str, extension: &str) -> NamedTempFile {
let mut file = tempfile::Builder::new()
.suffix(extension)
.tempfile()
.expect("Failed to create temp file");
file.write_all(content.as_bytes())
.expect("Failed to write to temp file");
file
}
#[test]
fn test_severity_classification() {
assert_eq!(GodClassSeverity::from_score(5.0), GodClassSeverity::Low);
assert_eq!(GodClassSeverity::from_score(15.0), GodClassSeverity::Low);
assert_eq!(GodClassSeverity::from_score(25.0), GodClassSeverity::Medium);
assert_eq!(GodClassSeverity::from_score(40.0), GodClassSeverity::High);
assert_eq!(GodClassSeverity::from_score(60.0), GodClassSeverity::Critical);
}
#[test]
fn test_indicators_score_calculation() {
let config = GodClassConfig::default();
let normal = GodClassIndicators {
method_count: 10,
attribute_count: 5,
line_count: 200,
lcom: 1,
coupling: 3,
complexity_sum: 30,
avg_complexity: 3.0,
public_methods: 8,
private_methods: 2,
};
assert_eq!(normal.calculate_score(&config), 0.0);
let god = GodClassIndicators {
method_count: 30, attribute_count: 20, line_count: 800, lcom: 5, coupling: 10,
complexity_sum: 150, avg_complexity: 5.0,
public_methods: 25,
private_methods: 5,
};
let score = god.calculate_score(&config);
assert!(score > 40.0, "Expected high score for god class, got {}", score);
}
#[test]
fn test_config_builders() {
let config = GodClassConfig::default()
.with_threshold(20.0)
.with_language("python")
.include_tests()
.include_framework();
assert_eq!(config.score_threshold, 20.0);
assert_eq!(config.language, Some("python".to_string()));
assert!(!config.exclude_tests);
assert!(!config.exclude_framework);
}
#[test]
fn test_small_class_not_detected() {
let source = r#"
class SmallClass:
def method_a(self):
self.attr = 1
def method_b(self):
self.attr = 2
"#;
let file = create_temp_file(source, ".py");
let result = detect_god_classes(file.path(), None, Some(10.0));
assert!(result.is_ok());
let analysis = result.unwrap();
assert!(analysis.findings.is_empty(), "Small class should not be flagged");
}
#[test]
fn test_large_class_detected() {
let mut source = String::from("class LargeClass:\n");
for i in 0..25 {
source.push_str(&format!(
" def method_{i}(self):\n self.attr_{i} = {i}\n return self.attr_{i}\n\n",
));
}
let file = create_temp_file(&source, ".py");
let result = detect_god_classes(file.path(), None, Some(5.0));
assert!(result.is_ok());
let analysis = result.unwrap();
assert!(!analysis.findings.is_empty(), "Large class should be flagged as God class");
let finding = &analysis.findings[0];
assert_eq!(finding.class_name, "LargeClass");
assert!(finding.indicators.method_count >= 20);
assert!(finding.score >= 5.0);
}
#[test]
fn test_test_class_excluded() {
let source = r#"
class TestUserHandler:
def test_create_user(self):
self.user = {}
def test_delete_user(self):
self.user = None
def test_update_user(self):
self.user = {"updated": True}
def setUp(self):
self.data = []
def tearDown(self):
self.data = None
"#;
let file = create_temp_file(source, ".py");
let config = GodClassConfig::default();
let result = detect_with_config(file.path(), config);
assert!(result.is_ok());
let analysis = result.unwrap();
assert!(analysis.findings.is_empty(), "Test class should be excluded");
assert!(analysis.stats.excluded_classes > 0);
}
#[test]
fn test_suggested_splits() {
let source = r#"
class UserOrderService:
def get_user(self):
return self.users
def update_user(self):
self.users = []
def delete_user(self):
self.users.clear()
def list_users(self):
return list(self.users)
def get_order(self):
return self.orders
def create_order(self):
self.orders.append({})
def cancel_order(self):
self.orders.pop()
def list_orders(self):
return list(self.orders)
"#;
let file = create_temp_file(source, ".py");
let result = detect_god_classes(file.path(), None, Some(1.0));
assert!(result.is_ok());
let analysis = result.unwrap();
if !analysis.findings.is_empty() {
let finding = &analysis.findings[0];
assert!(
finding.suggested_splits.len() >= 1,
"Should suggest splits for class with separate concerns"
);
}
}
#[test]
fn test_lcom_calculation() {
let mut method_attrs1: HashMap<String, HashSet<String>> = HashMap::new();
method_attrs1.insert("m1".to_string(), ["attr".to_string()].into());
method_attrs1.insert("m2".to_string(), ["attr".to_string()].into());
method_attrs1.insert("m3".to_string(), ["attr".to_string()].into());
let calls1: HashMap<String, HashSet<String>> = HashMap::new();
let lcom1 = calculate_lcom(&method_attrs1, &calls1);
assert_eq!(lcom1, 1, "All methods sharing attribute should be 1 component");
let mut method_attrs2: HashMap<String, HashSet<String>> = HashMap::new();
method_attrs2.insert("m1".to_string(), ["a".to_string()].into());
method_attrs2.insert("m2".to_string(), ["b".to_string()].into());
method_attrs2.insert("m3".to_string(), ["c".to_string()].into());
let lcom2 = calculate_lcom(&method_attrs2, &calls1);
assert_eq!(lcom2, 3, "Methods not sharing attributes should be separate components");
}
#[test]
fn test_nonexistent_path() {
let result = detect_god_classes("/nonexistent/path/file.py", None, None);
assert!(matches!(result, Err(GodClassError::NotFound(_))));
}
#[test]
fn test_stats_calculation() {
let findings = vec![
GodClassFinding {
class_name: "A".to_string(),
file: PathBuf::from("a.py"),
line: 1,
end_line: 100,
indicators: GodClassIndicators::default(),
score: 15.0,
severity: GodClassSeverity::Low,
suggested_splits: vec![],
score_breakdown: HashMap::new(),
exclusion_reason: None,
},
GodClassFinding {
class_name: "B".to_string(),
file: PathBuf::from("b.py"),
line: 1,
end_line: 200,
indicators: GodClassIndicators::default(),
score: 45.0,
severity: GodClassSeverity::High,
suggested_splits: vec![],
score_breakdown: HashMap::new(),
exclusion_reason: None,
},
];
let stats = GodClassStats::from_findings(&findings, 10, 2);
assert_eq!(stats.total_classes, 10);
assert_eq!(stats.god_classes, 2);
assert_eq!(stats.excluded_classes, 2);
assert!((stats.god_class_percentage - 20.0).abs() < 0.1);
assert_eq!(stats.affected_files, 2);
assert!((stats.average_score - 30.0).abs() < 0.1);
assert!((stats.max_score - 45.0).abs() < 0.1);
}
#[test]
fn test_builtin_type_detection() {
assert!(is_builtin_type("int"));
assert!(is_builtin_type("str"));
assert!(is_builtin_type("String"));
assert!(is_builtin_type("Vec<T>"));
assert!(is_builtin_type("Option<i32>"));
assert!(!is_builtin_type("UserService"));
assert!(!is_builtin_type("MyCustomClass"));
}
#[test]
fn test_private_method_detection() {
assert!(is_private_method("_helper", "python"));
assert!(!is_private_method("__init__", "python"));
assert!(!is_private_method("public_method", "python"));
assert!(is_private_method("#private", "typescript"));
assert!(is_private_method("_hidden", "typescript"));
assert!(!is_private_method("public", "typescript"));
}
}