use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphStore;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::OnceLock;
use tracing::{debug, info};
static LOOP: OnceLock<Regex> = OnceLock::new();
static QUERY: OnceLock<Regex> = OnceLock::new();
static QUERY_FUNC: OnceLock<Regex> = OnceLock::new();
fn loop_pattern() -> &'static Regex {
LOOP.get_or_init(|| Regex::new(r"(?i)(for\s+\w+\s+in|\.forEach|\.map\(|\.each)").expect("valid regex"))
}
fn query_pattern() -> &'static Regex {
QUERY.get_or_init(|| Regex::new(r"(?i)(\.get\(|\.find\(|\.filter\(|\.first\(|\.where\(|\.query\(|SELECT\s|Model\.\w+\.get|await\s+\w+\.findOne)").expect("valid regex"))
}
fn query_func_pattern() -> &'static Regex {
QUERY_FUNC.get_or_init(|| Regex::new(r"(?i)(get_|find_|fetch_|load_|query_|select_)").expect("valid regex"))
}
pub struct NPlusOneDetector {
repository_path: PathBuf,
max_findings: usize,
}
impl NPlusOneDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 50,
}
}
fn find_query_functions(&self, graph: &dyn crate::graph::GraphQuery) -> HashSet<String> {
let mut query_funcs = HashSet::new();
for func in graph.get_functions() {
if query_func_pattern().is_match(&func.name) {
query_funcs.insert(func.qualified_name.clone());
continue;
}
if let Some(content) =
crate::cache::global_cache().get_content(std::path::Path::new(&func.file_path))
{
let lines: Vec<&str> = content.lines().collect();
let start = func.line_start.saturating_sub(1) as usize;
let end = (func.line_end as usize).min(lines.len());
for line in lines.get(start..end).unwrap_or(&[]) {
if query_pattern().is_match(line) {
query_funcs.insert(func.qualified_name.clone());
break;
}
}
}
}
debug!("Found {} potential query functions", query_funcs.len());
query_funcs
}
#[allow(clippy::only_used_in_recursion)]
fn calls_query_transitively(
&self,
graph: &dyn crate::graph::GraphQuery,
func_qn: &str,
query_funcs: &HashSet<String>,
depth: usize,
visited: &mut HashSet<String>,
) -> Option<String> {
if depth > 5 || visited.contains(func_qn) {
return None;
}
visited.insert(func_qn.to_string());
let callees = graph.get_callees(func_qn);
for callee in &callees {
if query_funcs.contains(&callee.qualified_name) {
return Some(callee.name.clone());
}
if let Some(query_name) = self.calls_query_transitively(
graph,
&callee.qualified_name,
query_funcs,
depth + 1,
visited,
) {
return Some(format!("{} → {}", callee.name, query_name));
}
}
None
}
fn find_graph_n_plus_one(&self, graph: &dyn crate::graph::GraphQuery) -> Vec<Finding> {
let mut findings = Vec::new();
let query_funcs = self.find_query_functions(graph);
if query_funcs.is_empty() {
return findings;
}
for func in graph.get_functions() {
if findings.len() >= self.max_findings {
break;
}
if func.file_path.contains("/test") || func.file_path.contains("_test.") {
continue;
}
if func.file_path.contains("/detectors/") {
continue;
}
if func.file_path.contains("/cli/") {
continue;
}
if func.file_path.contains("/parsers/") {
continue;
}
if func.file_path.contains("/mcp/") {
continue;
}
if func.file_path.contains("/git/") {
continue;
}
if func.file_path.contains("/ai/") {
continue;
}
if func.file_path.contains("/reporters/") {
continue;
}
if func.file_path.contains("/scoring/") {
continue;
}
if func.file_path.contains("/graph/") {
continue;
}
if func.file_path.contains("/packages/react")
|| func.file_path.contains("/packages/shared")
|| func.file_path.contains("/packages/scheduler")
|| func.file_path.contains("/reconciler/")
|| func.file_path.contains("/fiber/")
|| func.file_path.contains("/forks/")
{
continue;
}
if crate::detectors::content_classifier::is_likely_bundled_path(&func.file_path) {
continue;
}
let has_loop = if let Some(content) =
crate::cache::global_cache().get_content(std::path::Path::new(&func.file_path))
{
let lines: Vec<&str> = content.lines().collect();
let start = func.line_start.saturating_sub(1) as usize;
let end = (func.line_end as usize).min(lines.len());
lines
.get(start..end)
.map(|slice| slice.iter().any(|line| loop_pattern().is_match(line)))
.unwrap_or(false)
} else {
false
};
if !has_loop {
continue;
}
let mut visited = HashSet::new();
for callee in graph.get_callees(&func.qualified_name) {
if let Some(query_chain) = self.calls_query_transitively(
graph,
&callee.qualified_name,
&query_funcs,
0,
&mut visited,
) {
findings.push(Finding {
id: String::new(),
detector: "NPlusOneDetector".to_string(),
severity: Severity::High,
title: format!("Hidden N+1: {} calls query in loop", func.name),
description: format!(
"Function '{}' contains a loop and calls '{}' which leads to a database query.\n\n\
**Call chain:** {} → {}\n\n\
This may cause N database queries instead of 1.",
func.name,
callee.name,
callee.name,
query_chain
),
affected_files: vec![PathBuf::from(&func.file_path)],
line_start: Some(func.line_start),
line_end: Some(func.line_end),
suggested_fix: Some(
"Consider:\n\
1. Batch the query before the loop\n\
2. Use eager loading/prefetching\n\
3. Cache results if the same query is repeated".to_string()
),
estimated_effort: Some("1 hour".to_string()),
category: Some("performance".to_string()),
cwe_id: None,
why_it_matters: Some(
"Hidden N+1 queries across function boundaries are harder to detect \
but cause the same performance issues.".to_string()
),
..Default::default()
});
break; }
}
}
findings
}
}
impl Detector for NPlusOneDetector {
fn name(&self) -> &'static str {
"n-plus-one"
}
fn description(&self) -> &'static str {
"Detects N+1 query patterns"
}
fn detect(&self, graph: &dyn crate::graph::GraphQuery) -> Result<Vec<Finding>> {
let mut findings = vec![];
let walker = ignore::WalkBuilder::new(&self.repository_path)
.hidden(false)
.git_ignore(true)
.build();
for entry in walker.filter_map(|e| e.ok()) {
if findings.len() >= self.max_findings {
break;
}
let path = entry.path();
if !path.is_file() {
continue;
}
let path_str = path.to_string_lossy();
if crate::detectors::base::is_test_path(&path_str) {
continue;
}
if path_str.contains("/packages/react")
|| path_str.contains("/packages/shared")
|| path_str.contains("/packages/scheduler")
|| path_str.contains("/reconciler/")
|| path_str.contains("/fiber/")
|| path_str.contains("/forks/")
{
continue;
}
if crate::detectors::content_classifier::is_likely_bundled_path(&path_str) {
continue;
}
if crate::detectors::content_classifier::is_non_production_path(&path_str) {
continue;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !matches!(ext, "py" | "js" | "ts" | "rb" | "java" | "go") {
continue;
}
if let Some(content) = crate::cache::global_cache().get_content(path) {
let mut in_loop = false;
let mut loop_line = 0;
let mut brace_depth = 0;
for (i, line) in content.lines().enumerate() {
if loop_pattern().is_match(line) {
in_loop = true;
loop_line = i + 1;
brace_depth = 0;
}
if in_loop {
brace_depth += line.matches('{').count() as i32;
brace_depth -= line.matches('}').count() as i32;
if brace_depth < 0 {
in_loop = false;
continue;
}
if query_pattern().is_match(line) {
findings.push(Finding {
id: String::new(),
detector: "NPlusOneDetector".to_string(),
severity: Severity::High,
title: "Potential N+1 query".to_string(),
description: format!(
"Database query inside loop (loop started at line {}).\n\n\
This pattern causes N database calls instead of 1.",
loop_line
),
affected_files: vec![path.to_path_buf()],
line_start: Some((i + 1) as u32),
line_end: Some((i + 1) as u32),
suggested_fix: Some(
"Use bulk fetch before loop or eager loading.".to_string(),
),
estimated_effort: Some("45 minutes".to_string()),
category: Some("performance".to_string()),
cwe_id: None,
why_it_matters: Some(
"Causes N database calls instead of 1.".to_string(),
),
..Default::default()
});
in_loop = false;
}
}
}
}
}
let graph_findings = self.find_graph_n_plus_one(graph);
let existing_locations: HashSet<(String, u32)> = findings
.iter()
.flat_map(|f| {
f.affected_files
.iter()
.map(|p| (p.to_string_lossy().to_string(), f.line_start.unwrap_or(0)))
})
.collect();
for finding in graph_findings {
let key = (
finding
.affected_files
.first()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default(),
finding.line_start.unwrap_or(0),
);
if !existing_locations.contains(&key) && findings.len() < self.max_findings {
findings.push(finding);
}
}
info!(
"NPlusOneDetector found {} findings (source + graph)",
findings.len()
);
Ok(findings)
}
}