use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphQueryExt;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::sync::LazyLock;
use tracing::info;
static LOOP_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(for\s+\w+\s+in|\.forEach|\.map\(|\.each|for\s*\(|while\s*\()")
.expect("valid regex")
});
static STRING_CONCAT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"\w+\s*\+=\s*(?:["'`]|f["'])"#).expect("valid regex")
});
static FOR_VAR_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"for\s+(\w+)\s+in").expect("valid regex"));
static CONCAT_VAR_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(\w+)\s*\+=").expect("valid regex"));
pub struct StringConcatLoopDetector {
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
}
impl StringConcatLoopDetector {
crate::detectors::detector_new!(50);
fn find_concat_functions(&self, graph: &dyn crate::graph::GraphQuery) -> HashSet<String> {
let i = graph.interner();
let mut concat_funcs = HashSet::new();
let mut file_lines: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for func in graph.get_functions_shared().iter() {
let lines = file_lines
.entry(func.path(i).to_string())
.or_insert_with(|| {
crate::cache::global_cache()
.content(std::path::Path::new(func.path(i)))
.map(|c| c.lines().map(String::from).collect())
.unwrap_or_default()
});
let start = func.line_start.saturating_sub(1) as usize;
let end = (func.line_end as usize).min(lines.len());
for line in lines.get(start..end).unwrap_or(&[]) {
if STRING_CONCAT.is_match(line) {
concat_funcs.insert(func.qn(i).to_string());
break;
}
}
}
concat_funcs
}
fn get_suggestion(ext: &str) -> String {
match ext {
"py" => "Use list and join:\n\
```python\n\
parts = []\n\
for item in items:\n\
parts.append(str(item))\n\
result = ''.join(parts)\n\
```\n\
Or use a list comprehension:\n\
```python\n\
result = ''.join(str(item) for item in items)\n\
```"
.to_string(),
"java" => "Use StringBuilder:\n\
```java\n\
StringBuilder sb = new StringBuilder();\n\
for (String item : items) {\n\
sb.append(item);\n\
}\n\
String result = sb.toString();\n\
```"
.to_string(),
"js" | "ts" => "Use array and join:\n\
```javascript\n\
const parts = items.map(item => String(item));\n\
const result = parts.join('');\n\
```\n\
Or use template literals with reduce:\n\
```javascript\n\
const result = items.reduce((acc, item) => `${acc}${item}`, '');\n\
```"
.to_string(),
"go" => "Use strings.Builder:\n\
```go\n\
var sb strings.Builder\n\
for _, item := range items {\n\
sb.WriteString(item)\n\
}\n\
result := sb.String()\n\
```"
.to_string(),
_ => "Use a StringBuilder or list.join() approach.".to_string(),
}
}
}
impl Detector for StringConcatLoopDetector {
fn name(&self) -> &'static str {
"string-concat-loop"
}
fn description(&self) -> &'static str {
"Detects string concatenation in loops"
}
fn file_extensions(&self) -> &'static [&'static str] {
&["py", "js", "ts", "jsx", "tsx", "java", "go"]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let i = graph.interner();
let mut findings = vec![];
let concat_funcs = self.find_concat_functions(graph);
for path in files.files_with_extensions(&["py", "js", "ts", "java", "go", "rb", "php"]) {
if findings.len() >= self.max_findings {
break;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if let Some(content) = files.content(path) {
let is_python = ext == "py";
let mut in_loop = false;
let mut loop_line: usize = 0;
let mut brace_depth = 0;
let mut loop_indent: usize = 0;
let mut loop_line_idx: usize = 0;
let mut _loop_var = String::new();
let all_lines: Vec<&str> = content.lines().collect();
let mut loop_concats: HashMap<String, (usize, u32)> = HashMap::new();
let flush_loop_concats =
|concats: &mut HashMap<String, (usize, u32)>,
findings: &mut Vec<Finding>,
loop_start_line: usize,
file_path: &std::path::Path,
extension: &str| {
let mut entries: Vec<_> = concats.drain().collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
for (var_name, (first_line, count)) in entries {
if count >= 2 {
let suggestion = Self::get_suggestion(extension);
findings.push(Finding {
id: String::new(),
detector: "StringConcatLoopDetector".to_string(),
severity: Severity::Medium,
title: "String concatenation in loop".to_string(),
description: format!(
"Variable '{}' concatenated {} times inside loop (started line {}).\n\n\
**Performance:** O(n²) time complexity. Each concatenation \
creates a new string and copies all previous characters.\n\n\
For 1000 iterations, this copies ~500,000 characters instead of 1000.",
var_name, count, loop_start_line
),
affected_files: vec![file_path.to_path_buf()],
line_start: Some(first_line as u32),
line_end: Some(first_line as u32),
suggested_fix: Some(suggestion),
estimated_effort: Some("15 minutes".to_string()),
category: Some("performance".to_string()),
cwe_id: None,
why_it_matters: Some(
"String concatenation in loops creates O(n²) time complexity \
due to immutable string copying.".to_string()
),
..Default::default()
});
}
}
};
for (i, line) in all_lines.iter().enumerate() {
if LOOP_PATTERN.is_match(line) {
if in_loop {
flush_loop_concats(
&mut loop_concats,
&mut findings,
loop_line,
path,
ext,
);
}
in_loop = true;
loop_line = i + 1;
loop_line_idx = i;
loop_concats.clear();
if is_python {
loop_indent = line.len() - line.trim_start().len();
} else {
brace_depth = 0;
}
if let Some(caps) = FOR_VAR_PATTERN.captures(line) {
_loop_var = caps
.get(1)
.map(|m| m.as_str().to_string())
.unwrap_or_default();
}
}
if in_loop {
if is_python {
let trimmed = line.trim();
if !trimmed.is_empty() && i > loop_line_idx {
let current_indent = line.len() - line.trim_start().len();
if current_indent <= loop_indent {
flush_loop_concats(
&mut loop_concats,
&mut findings,
loop_line,
path,
ext,
);
in_loop = false;
continue;
}
}
} else {
brace_depth += line.matches('{').count() as i32;
brace_depth -= line.matches('}').count() as i32;
if brace_depth < 0 {
flush_loop_concats(
&mut loop_concats,
&mut findings,
loop_line,
path,
ext,
);
in_loop = false;
continue;
}
}
if STRING_CONCAT.is_match(line) {
let prev_line = if i > 0 { Some(all_lines[i - 1]) } else { None };
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
if let Some(caps) = CONCAT_VAR_PATTERN.captures(line) {
let var_name = caps
.get(1)
.map(|m| m.as_str().to_string())
.unwrap_or_default();
let entry = loop_concats.entry(var_name).or_insert((i + 1, 0));
entry.1 += 1;
}
}
}
}
if in_loop {
flush_loop_concats(&mut loop_concats, &mut findings, loop_line, path, ext);
}
}
}
if !concat_funcs.is_empty() {
let mut graph_file_lines: HashMap<String, Vec<String>> = HashMap::new();
for func in graph.get_functions_shared().iter() {
if findings.len() >= self.max_findings {
break;
}
if func.path(i).ends_with(".rs") {
continue;
}
let lines = graph_file_lines
.entry(func.path(i).to_string())
.or_insert_with(|| {
crate::cache::global_cache()
.content(std::path::Path::new(func.path(i)))
.map(|c| c.lines().map(String::from).collect())
.unwrap_or_default()
});
let start = func.line_start.saturating_sub(1) as usize;
let end = (func.line_end as usize).min(lines.len());
let has_loop = lines
.get(start..end)
.map(|slice| slice.iter().any(|line| LOOP_PATTERN.is_match(line)))
.unwrap_or(false);
if !has_loop {
continue;
}
for callee in graph.get_callees(func.qn(i)) {
if concat_funcs.contains(callee.qn(i)) {
findings.push(Finding {
id: String::new(),
detector: "StringConcatLoopDetector".to_string(),
severity: Severity::Medium,
title: format!("Hidden string concat: {} → {}", func.node_name(i), callee.node_name(i)),
description: format!(
"Function '{}' contains a loop and calls '{}' which does string concatenation.\n\n\
This creates the same O(n²) performance issue across function boundaries.",
func.node_name(i), callee.node_name(i)
),
affected_files: vec![PathBuf::from(func.path(i))],
line_start: Some(func.line_start),
line_end: Some(func.line_end),
suggested_fix: Some(
"Options:\n\
1. Refactor the called function to accept a StringBuilder/list\n\
2. Batch the operations before calling\n\
3. Return parts instead of concatenating inside".to_string()
),
estimated_effort: Some("30 minutes".to_string()),
category: Some("performance".to_string()),
cwe_id: None,
why_it_matters: Some(
"Hidden O(n²) patterns are harder to spot but equally impactful.".to_string()
),
..Default::default()
});
break;
}
}
}
}
info!(
"StringConcatLoopDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for StringConcatLoopDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_detects_string_concat_in_loop() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("builder.py", "def build_output(items):\n result = \"\"\n for item in items:\n result += \"key: \"\n result += \"value\"\n return result\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect string concatenation in loop (2+ concats to same var). Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_join() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("builder.py", "def build_output(items):\n parts = []\n for item in items:\n parts.append(str(item))\n return ''.join(parts)\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag list.append + join pattern. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_numeric_accumulation() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("calc.py", "def total_price(items):\n total = 0\n for item in items:\n total += item.price\n return total\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag numeric accumulation (total += item.price). Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_counter_increment() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("count.py", "def count_active(users):\n count = 0\n for user in users:\n count += 1\n return count\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag counter increment (count += 1). Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_still_detects_string_literal_concat_in_loop() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("build.py", "def build(items):\n result = \"\"\n for item in items:\n result += \"prefix_\"\n result += \"suffix_\"\n return result\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should still detect string literal concat in loop (2+ concats)"
);
}
#[test]
fn test_still_detects_string_concat_with_plus() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("build.py", "def build(items):\n result = \"\"\n for item in items:\n result = result + \"value\"\n return result\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not detect result = result + 'value' since = x + y pattern was removed"
);
}
#[test]
fn test_no_finding_for_media_iadd() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("forms.py", "for fs in formsets:\n media += fs.media\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag media += fs.media (not string concat). Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_no_finding_for_concat_after_loop() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"builder.py",
"for item in items:\n process(item)\n\nresult += \"_suffix\"\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Concat after loop exits should not be flagged. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_still_detects_string_concat_in_loop() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("slow.py", "result = \"\"\nfor item in items:\n result += \"item: \"\n result += \"value\"\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should still detect string concat inside loop (2+ concats)"
);
}
#[test]
fn test_no_finding_for_single_concat_per_iteration() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![("views.py", "for item in items:\n url += \"/\"\n")],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag single concat per iteration. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
#[test]
fn test_still_detects_multiple_concats_in_loop() {
let store = GraphBuilder::new().freeze();
let detector = StringConcatLoopDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("builder.py", "for field in fields:\n definition += \" \" + check\n definition += \" \" + suffix\n definition += \" \" + fk\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect multiple concats to same variable in loop"
);
}
}