use crate::detectors::analysis_context::AnalysisContext;
use crate::detectors::base::{Detector, DetectorConfig};
use crate::detectors::function_context::FunctionRole;
use crate::graph::GraphQueryExt;
use crate::models::{Finding, Severity};
use anyhow::Result;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tracing::info;
pub struct LongMethodsDetector {
#[allow(dead_code)] repository_path: PathBuf,
#[allow(dead_code)] config: DetectorConfig,
max_findings: usize,
#[allow(dead_code)] threshold: u32,
}
fn language_line_threshold(ext: &str) -> usize {
match ext {
"py" | "pyi" => 100,
"rs" | "go" | "ts" | "tsx" | "js" | "jsx" | "mjs" => 150,
"java" | "cs" | "c" | "h" | "cpp" | "cc" | "cxx" | "hpp" => 200,
_ => 150,
}
}
impl LongMethodsDetector {
#[allow(dead_code)] pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
config: DetectorConfig::new(),
max_findings: 100,
threshold: 80,
}
}
pub fn with_config(repository_path: impl Into<PathBuf>, config: DetectorConfig) -> Self {
use crate::calibrate::MetricKind;
let default_threshold = 80usize;
let adaptive_threshold = config
.adaptive
.warn_usize(MetricKind::FunctionLength, default_threshold);
let threshold = config.get_option_or("max_lines", adaptive_threshold) as u32;
Self {
repository_path: repository_path.into(),
max_findings: 100,
threshold,
config,
}
}
fn is_graph_orchestrator(
&self,
graph: &dyn crate::graph::GraphQuery,
qualified_name: &str,
lines: u32,
complexity: i64,
) -> bool {
let _i = graph.interner();
let callees = graph.get_callees(qualified_name);
let out_degree = callees.len();
if out_degree >= 7 {
let complexity_per_line = complexity as f64 / lines as f64;
complexity_per_line < 0.2
} else {
false
}
}
fn find_callee_clusters(
&self,
graph: &dyn crate::graph::GraphQuery,
qualified_name: &str,
) -> Vec<String> {
let i = graph.interner();
let callees = graph.get_callees(qualified_name);
let mut modules: HashSet<String> = HashSet::new();
for callee in &callees {
if let Some(module) = callee.path(i).rsplit('/').nth(1) {
modules.insert(module.to_string());
}
}
modules.into_iter().take(5).collect()
}
fn complexity_density(complexity: i64, lines: u32) -> f64 {
if lines == 0 {
return 0.0;
}
complexity as f64 / lines as f64
}
}
impl Detector for LongMethodsDetector {
fn name(&self) -> &'static str {
"long-methods"
}
fn description(&self) -> &'static str {
"Detects methods/functions over 80 lines"
}
fn file_extensions(&self) -> &'static [&'static str] {
&[
"py", "js", "ts", "jsx", "tsx", "rb", "java", "go", "rs", "c", "cpp", "cs",
]
}
fn detect(&self, ctx: &AnalysisContext) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let i = graph.interner();
let mut findings = vec![];
for func in graph.get_functions_shared().iter() {
if findings.len() >= self.max_findings {
break;
}
if func.path(i).contains("/detectors/") {
continue;
}
let lines = func.line_end.saturating_sub(func.line_start);
let qn = func.qn(i);
let ext = Path::new(func.path(i))
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let lang_base = language_line_threshold(ext);
let threshold = lang_base.max(ctx.threshold(
crate::calibrate::MetricKind::FunctionLength,
lang_base as f64,
) as usize) as u32;
if lines <= threshold {
continue;
}
let complexity = func.complexity_opt().unwrap_or(1);
let is_orchestrator = matches!(ctx.function_role(qn), Some(FunctionRole::Orchestrator))
|| self.is_graph_orchestrator(graph, qn, lines, complexity);
let is_test = ctx.is_test_function(qn);
let callee_clusters = self.find_callee_clusters(graph, qn);
let density = Self::complexity_density(complexity, lines);
let callees = graph.get_callees(qn);
let out_degree = callees.len();
let mut severity = if lines > threshold * 3 {
Severity::High
} else if lines > threshold * 2 {
Severity::Medium
} else {
Severity::Low
};
if density > 0.5 && lines > 100 {
severity = match severity {
Severity::Low => Severity::Medium,
Severity::Medium => Severity::High,
_ => severity,
};
}
if is_orchestrator {
severity = match severity {
Severity::High => Severity::Medium,
_ => Severity::Low,
};
}
if is_test {
severity = Severity::Low;
}
if !ctx.is_reachable(qn) && !ctx.is_public_api(qn) {
severity = match severity {
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
_ => severity,
};
}
let mut notes = Vec::new();
if is_orchestrator {
notes.push(format!(
"Orchestrator pattern: calls {} functions (reduced severity)",
out_degree
));
}
if density > 0.3 {
notes.push(format!(
"High complexity density: {:.2} (complexity {} / {} lines)",
density, complexity, lines
));
}
if callee_clusters.len() >= 3 {
notes.push(format!(
"Calls {} different modules - possible split points",
callee_clusters.len()
));
}
if is_test {
notes.push("Test function (severity capped at Low)".to_string());
}
let context_notes = if notes.is_empty() {
String::new()
} else {
format!("\n\n**Graph Analysis:**\n{}", notes.join("\n"))
};
let suggestion = if is_orchestrator {
"This appears to be an orchestrator function (coordinates many calls).\n\
If it must remain long, ensure it:\n\
1. Has clear section comments\n\
2. Handles errors at each step\n\
3. Has a clear flow (consider a state machine for complex flows)"
.to_string()
} else if callee_clusters.len() >= 3 {
format!(
"This function calls {} different modules. Consider extracting:\n{}",
callee_clusters.len(),
callee_clusters
.iter()
.take(3)
.map(|m| format!(" - `handle_{}()` for {} operations", m, m))
.collect::<Vec<_>>()
.join("\n")
)
} else if density > 0.4 {
"High complexity density - this function does too much logic.\n\
1. Extract conditional branches into helper functions\n\
2. Use early returns to reduce nesting\n\
3. Consider the Strategy pattern for varying behaviors"
.to_string()
} else {
"Break into smaller, focused functions.".to_string()
};
findings.push(Finding {
id: String::new(),
detector: "LongMethodsDetector".to_string(),
severity,
title: format!("Long method: {} ({} lines)", func.node_name(i), lines),
description: format!(
"Function '{}' has {} lines (effective threshold: {}).{}",
func.node_name(i),
lines,
threshold,
context_notes
),
affected_files: vec![PathBuf::from(func.path(i))],
line_start: Some(func.line_start),
line_end: Some(func.line_end),
suggested_fix: Some(suggestion),
estimated_effort: Some(if lines > 200 {
"1-2 hours".to_string()
} else {
"30 minutes".to_string()
}),
category: Some("maintainability".to_string()),
cwe_id: None,
why_it_matters: Some(
"Long methods are hard to understand, test, and maintain. \
Each function should do one thing well."
.to_string(),
),
..Default::default()
});
}
info!(
"LongMethodsDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for LongMethodsDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::with_config(
init.repo_path,
init.config_for("long-methods"),
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
use crate::graph::{CodeNode, NodeKind};
#[test]
fn test_language_threshold_long_methods() {
assert_eq!(language_line_threshold("rs"), 150);
assert_eq!(language_line_threshold("py"), 100);
assert_eq!(language_line_threshold("java"), 200);
assert_eq!(language_line_threshold("go"), 150);
assert_eq!(language_line_threshold("unknown"), 150);
}
#[test]
fn test_detects_long_method() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("big_function", "/src/app.py")
.with_qualified_name("app::big_function")
.with_lines(1, 121);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect function with 120 lines (threshold 80 for .py files)"
);
assert!(
findings[0].title.contains("big_function"),
"Title should mention function name, got: {}",
findings[0].title
);
}
#[test]
fn test_no_finding_for_short_method() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("small_func", "/src/app.py")
.with_qualified_name("app::small_func")
.with_lines(1, 21);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag a 20-line function, but got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_test_function_severity_capped() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("test_big_scenario", "/src/tests.py")
.with_qualified_name("tests::test_big_scenario")
.with_lines(1, 251);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should still detect the 250-line function"
);
}
#[test]
fn test_severity_uses_overshoot_ratio() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("slightly_long", "/src/app.rs")
.with_qualified_name("app::slightly_long")
.with_lines(1, 161);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].severity,
Severity::Low,
"Slight overshoot should be Low"
);
}
#[test]
fn test_description_shows_effective_threshold() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("big_fn", "/src/app.py")
.with_qualified_name("app::big_fn")
.with_lines(1, 121);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(!findings.is_empty());
assert!(
findings[0].description.contains("effective threshold"),
"Description should show effective threshold, got: {}",
findings[0].description
);
}
#[test]
fn test_python_threshold_is_100() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("medium_fn", "/src/app.py")
.with_qualified_name("app::medium_fn")
.with_lines(1, 111);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"110-line Python function should trigger (threshold is 100)"
);
}
#[test]
fn test_java_threshold_is_200() {
let dir = tempfile::tempdir().expect("should create temp dir");
let mut store = GraphBuilder::new();
let func = CodeNode::function("medium_java_fn", "/src/Foo.java")
.with_qualified_name("Foo::medium_java_fn")
.with_lines(1, 181);
store.add_node(func);
let detector = LongMethodsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"180-line Java function should not trigger (threshold is 200), but got: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
}