use crate::detectors::base::Detector;
use crate::graph::GraphQueryExt;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use std::path::PathBuf;
use tracing::info;
use super::{CLONE_CALL, HOT_PATH_INDICATOR};
pub struct CloneInHotPathDetector {
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
}
const MIN_FAN_IN_FOR_HOT: usize = 2;
const MIN_CLONES_TO_FLAG: usize = 2;
const SHORT_FUNCTION_LINES: u32 = 15;
impl CloneInHotPathDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 25,
}
}
fn is_hot_path_context(content: &str, line_idx: usize, current_line: &str) -> bool {
if HOT_PATH_INDICATOR.is_match(current_line) {
return true;
}
let lines: Vec<&str> = content.lines().collect();
let start = line_idx.saturating_sub(10);
let mut brace_depth = 0;
for i in (start..line_idx).rev() {
if let Some(line) = lines.get(i) {
brace_depth += line.matches('}').count();
brace_depth = brace_depth.saturating_sub(line.matches('{').count());
if brace_depth == 0 && HOT_PATH_INDICATOR.is_match(line) {
return true;
}
}
}
false
}
fn is_builder_or_constructor_name(name: &str) -> bool {
let bare = name.rsplit("::").next().unwrap_or(name);
let bare = bare.rsplit('.').next().unwrap_or(bare);
bare == "build"
|| bare == "new"
|| bare == "default"
|| bare == "clone"
|| bare.starts_with("with_")
|| bare.starts_with("set_")
|| bare.starts_with("from_")
|| bare.starts_with("into_")
|| bare.starts_with("to_")
}
}
impl Detector for CloneInHotPathDetector {
fn name(&self) -> &'static str {
"rust-clone-in-hot-path"
}
fn description(&self) -> &'static str {
"Detects .clone() in loops and iterators in hot code paths"
}
fn requires_graph(&self) -> bool {
false
}
fn file_extensions(&self) -> &'static [&'static str] {
&["rs"]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let mut findings = vec![];
for path in files.files_with_extension("rs") {
if findings.len() >= self.max_findings {
break;
}
let path_str_check = path.to_string_lossy();
if path_str_check.contains("/tests/")
|| path_str_check.contains("_test.")
|| path_str_check.contains(".test.")
|| path_str_check.contains("/test/")
{
continue;
}
let Some(content) = files.content(path) else {
continue;
};
let all_lines: Vec<&str> = content.lines().collect();
let test_context = super::precompute_test_context(&all_lines);
struct CloneHit {
line_num: u32,
}
struct FunctionClones {
qn: String,
func_name: String,
func_loc: u32,
hits: Vec<CloneHit>,
}
let mut func_clones: std::collections::HashMap<u32, FunctionClones> =
std::collections::HashMap::new();
let mut orphan_hits: Vec<CloneHit> = Vec::new();
for (i, line) in all_lines.iter().enumerate() {
if test_context[i] {
continue;
}
let prev_line = if i > 0 { Some(all_lines[i - 1]) } else { None };
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
let trimmed = line.trim();
if trimmed.starts_with("//") {
continue;
}
if !CLONE_CALL.is_match(line) || !Self::is_hot_path_context(&content, i, line) {
continue;
}
let file_str = path.to_string_lossy();
let line_num = (i + 1) as u32;
if let Some(containing_func) = graph.find_function_at(&file_str, line_num) {
let interner = graph.interner();
let qn = containing_func.qn(interner).to_string();
let func_name = containing_func.node_name(interner).to_string();
let func_loc = containing_func
.line_end
.saturating_sub(containing_func.line_start)
.max(1);
let entry = func_clones
.entry(containing_func.line_start)
.or_insert_with(|| FunctionClones {
qn: qn.clone(),
func_name: func_name.clone(),
func_loc,
hits: Vec::new(),
});
entry.hits.push(CloneHit { line_num });
} else {
orphan_hits.push(CloneHit { line_num });
}
}
let file_str = path.to_string_lossy().to_string();
for func in func_clones.values() {
if ctx.is_test_function(&func.qn) {
continue;
}
if !ctx.is_reachable(&func.qn) && !ctx.is_public_api(&func.qn) {
continue;
}
if Self::is_builder_or_constructor_name(&func.func_name) {
continue;
}
let clone_count = func.hits.len();
if clone_count < MIN_CLONES_TO_FLAG && func.func_loc >= SHORT_FUNCTION_LINES {
continue;
}
let fan_in = if let Some(fc) = ctx.functions.get(&func.qn) {
fc.in_degree
} else {
graph.call_fan_in(&func.qn)
};
if fan_in < MIN_FAN_IN_FOR_HOT && !ctx.is_public_api(&func.qn) {
continue;
}
let severity = if ctx.is_infrastructure(&func.qn) {
Severity::Info
} else if clone_count >= 3 {
Severity::Medium
} else {
Severity::Low
};
let first_hit = &func.hits[0];
let last_hit = &func.hits[func.hits.len() - 1];
let description = if clone_count == 1 {
format!(
"`.clone()` in a hot path inside `{}` (fan-in: {fan_in}). \
Consider references, Cow, or Arc.",
func.func_name
)
} else {
format!(
"{clone_count} `.clone()` calls in hot paths inside `{}` (fan-in: {fan_in}). \
Consider references, Cow, or Arc to reduce allocation overhead.",
func.func_name
)
};
findings.push(Finding {
id: deterministic_finding_id(
"CloneInHotPathDetector",
&file_str,
first_hit.line_num,
"clone in hot path",
),
detector: "CloneInHotPathDetector".to_string(),
severity,
title: format!(
".clone() in loop/iterator ({clone_count}x in `{}`)",
func.func_name
),
description,
affected_files: vec![path.to_path_buf()],
line_start: Some(first_hit.line_num),
line_end: Some(last_hit.line_num),
suggested_fix: Some(
"Use references, Cow<str>, or Arc instead of clone.".to_string(),
),
estimated_effort: Some("20 minutes".to_string()),
category: Some("performance".to_string()),
why_it_matters: Some(
"Cloning inside loops multiplies allocation overhead.".to_string(),
),
..Default::default()
});
if findings.len() >= self.max_findings {
break;
}
}
if orphan_hits.len() >= MIN_CLONES_TO_FLAG && findings.len() < self.max_findings {
let first = &orphan_hits[0];
let last = &orphan_hits[orphan_hits.len() - 1];
findings.push(Finding {
id: deterministic_finding_id(
"CloneInHotPathDetector",
&file_str,
first.line_num,
"clone in hot path",
),
detector: "CloneInHotPathDetector".to_string(),
severity: Severity::Low,
title: format!(
".clone() in loop/iterator ({}x at module level)",
orphan_hits.len()
),
description:
"Cloning in a hot path can cause performance issues. Consider references, Cow, or Arc."
.to_string(),
affected_files: vec![path.to_path_buf()],
line_start: Some(first.line_num),
line_end: Some(last.line_num),
suggested_fix: Some(
"Use references, Cow<str>, or Arc instead of clone.".to_string(),
),
estimated_effort: Some("20 minutes".to_string()),
category: Some("performance".to_string()),
why_it_matters: Some(
"Cloning inside loops multiplies allocation overhead.".to_string(),
),
..Default::default()
});
}
}
info!("CloneInHotPathDetector found {} findings", findings.len());
Ok(findings)
}
}
impl super::super::RegisteredDetector for CloneInHotPathDetector {
fn create(init: &super::super::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
fn max_tier() -> crate::models::Tier {
crate::models::Tier::Deep
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::detectors::base::Detector;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_clone_in_loop_multiple_clones_flagged() {
let graph = GraphBuilder::new().freeze();
let detector = CloneInHotPathDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&graph,
vec![(
"test.rs",
"fn process(items: &[Item]) {\n \
for item in items {\n \
let owned = item.clone();\n \
let other = item.name.clone();\n \
do_something(owned, other);\n \
}\n}\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.len() <= 1,
"expected at most 1 aggregated finding, got {}",
findings.len()
);
}
#[test]
fn test_clone_in_test_skipped() {
let graph = GraphBuilder::new().freeze();
let detector = CloneInHotPathDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&graph,
vec![(
"test.rs",
"#[cfg(test)]\nmod tests {\n \
#[test]\n fn test_something() {\n \
for item in items {\n \
let owned = item.clone();\n \
let other = item.name.clone();\n \
}\n }\n}\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"clones in test code should not be flagged"
);
}
#[test]
fn test_single_clone_in_large_function_skipped() {
let graph = GraphBuilder::new().freeze();
let detector = CloneInHotPathDetector::new("/mock/repo");
let mut lines = String::from("fn big_function(items: &[Item]) {\n");
lines.push_str(" for item in items {\n");
lines.push_str(" let owned = item.clone();\n");
for i in 0..20 {
lines.push_str(&format!(" let x{i} = {i};\n"));
}
lines.push_str(" }\n}\n");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&graph,
vec![("test.rs", &lines)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"single clone in large function should not be flagged"
);
}
#[test]
fn test_builder_pattern_name_detection() {
assert!(CloneInHotPathDetector::is_builder_or_constructor_name(
"with_timeout"
));
assert!(CloneInHotPathDetector::is_builder_or_constructor_name(
"MyStruct::new"
));
assert!(CloneInHotPathDetector::is_builder_or_constructor_name(
"build"
));
assert!(CloneInHotPathDetector::is_builder_or_constructor_name(
"set_name"
));
assert!(CloneInHotPathDetector::is_builder_or_constructor_name(
"from_str"
));
assert!(!CloneInHotPathDetector::is_builder_or_constructor_name(
"process_items"
));
assert!(!CloneInHotPathDetector::is_builder_or_constructor_name(
"detect"
));
}
#[test]
fn test_no_hot_path_context_not_flagged() {
let graph = GraphBuilder::new().freeze();
let detector = CloneInHotPathDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&graph,
vec![(
"test.rs",
"fn simple() {\n let x = foo.clone();\n let y = bar.clone();\n}\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"clones outside hot paths should not be flagged"
);
}
}