use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphQueryExt;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::LazyLock;
use tracing::info;
static ASYNC_FUNC: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(async\s+def|async\s+function|async\s+fn)").expect("valid regex")
});
static BLOCKING: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(time\.sleep|Thread\.sleep|readFileSync|writeFileSync|execSync|spawnSync|requests\.(get|post|put|delete|head|patch)|urllib\.request|urlopen|subprocess\.(run|call|check_output)|os\.system|std::thread::sleep|std::fs::(read|write)|open\([^)]+\)\.read)").expect("valid regex")
});
fn get_async_alternative(blocking_call: &str) -> &'static str {
let call_lower = blocking_call.to_lowercase();
if call_lower.contains("time.sleep") {
return "asyncio.sleep()";
}
if call_lower.contains("thread.sleep") {
return "await new Promise(r => setTimeout(r, ms))";
}
if call_lower.contains("readfilesync") {
return "await fs.promises.readFile()";
}
if call_lower.contains("writefilesync") {
return "await fs.promises.writeFile()";
}
if call_lower.contains("execsync") || call_lower.contains("spawnsync") {
return "await exec() from child_process/promises or execa";
}
if call_lower.contains("requests.") {
return "aiohttp, httpx, or aiofiles";
}
if call_lower.contains("urllib") || call_lower.contains("urlopen") {
return "aiohttp.ClientSession()";
}
if call_lower.contains("subprocess") || call_lower.contains("os.system") {
return "asyncio.create_subprocess_exec()";
}
if call_lower.contains("std::thread::sleep") {
return "tokio::time::sleep() or async-std equivalent";
}
if call_lower.contains("std::fs") {
return "tokio::fs or async-std::fs";
}
if call_lower.contains("open(") {
return "aiofiles.open() for Python";
}
"Use async equivalent"
}
pub struct SyncInAsyncDetector {
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
}
impl SyncInAsyncDetector {
crate::detectors::detector_new!(50);
fn find_blocking_functions(&self, graph: &dyn crate::graph::GraphQuery) -> HashSet<String> {
let i = graph.interner();
let mut blocking_funcs = HashSet::new();
let mut file_lines: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for func in graph.get_functions_shared().iter() {
let lines = file_lines
.entry(func.path(i).to_string())
.or_insert_with(|| {
crate::cache::global_cache()
.masked_content(std::path::Path::new(func.path(i)))
.map(|c| c.lines().map(String::from).collect())
.unwrap_or_default()
});
let start = func.line_start.saturating_sub(1) as usize;
let end = (func.line_end as usize).min(lines.len());
for line in lines.get(start..end).unwrap_or(&[]) {
if BLOCKING.is_match(line) {
blocking_funcs.insert(func.qn(i).to_string());
break;
}
}
}
blocking_funcs
}
fn check_transitive_blocking(
&self,
graph: &dyn crate::graph::GraphQuery,
func: &crate::graph::CodeNode,
blocking_funcs: &HashSet<String>,
) -> Vec<String> {
let i = graph.interner();
let mut blocked_by = Vec::new();
let callees = graph.get_callees(func.qn(i));
for callee in callees {
if blocking_funcs.contains(callee.qn(i)) {
blocked_by.push(callee.node_name(i).to_string());
}
}
blocked_by
}
fn find_async_function(
graph: &dyn crate::graph::GraphQuery,
file_path: &str,
line: u32,
) -> Option<String> {
graph.find_function_at(file_path, line).map(|f| {
f.node_name(crate::graph::interner::global_interner())
.to_string()
})
}
}
impl Detector for SyncInAsyncDetector {
fn name(&self) -> &'static str {
"sync-in-async"
}
fn description(&self) -> &'static str {
"Detects blocking calls in async functions"
}
fn file_extensions(&self) -> &'static [&'static str] {
&["py", "js", "ts", "jsx", "tsx"]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let files = &ctx.as_file_provider();
let mut findings = vec![];
let mut blocking_funcs: Option<HashSet<String>> = None;
for path in files.files_with_extensions(&["py", "js", "ts", "jsx", "tsx"]) {
if findings.len() >= self.max_findings {
break;
}
let path_str = path.to_string_lossy().to_string();
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if path_str.contains("/detectors/") {
continue;
}
if crate::detectors::content_classifier::is_non_production_path(&path_str) {
continue;
}
if let Some(content) = files.content(path) {
let lines: Vec<&str> = content.lines().collect();
let mut in_async = false;
let mut async_indent = 0;
let mut current_async_name = String::new();
for (i, line) in lines.iter().enumerate() {
let prev_line = if i > 0 { Some(lines[i - 1]) } else { None };
if crate::detectors::is_line_suppressed(line, prev_line) {
continue;
}
let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
if ASYNC_FUNC.is_match(line) {
in_async = true;
async_indent = current_indent;
if let Some(name) =
Self::find_async_function(graph, &path_str, (i + 1) as u32)
{
current_async_name = name;
}
}
if in_async
&& ext == "py"
&& !line.trim().is_empty()
&& current_indent <= async_indent
&& i > 0
&& !ASYNC_FUNC.is_match(line)
{
in_async = false;
}
if !in_async {
continue;
}
if let Some(m) = BLOCKING.find(line) {
let blocking_call = m.as_str();
let alternative = get_async_alternative(blocking_call);
let mut notes = Vec::new();
if !current_async_name.is_empty() {
notes.push(format!("📦 In async function: `{}`", current_async_name));
}
if let Some(func) = graph.find_function_at(&path_str, (i + 1) as u32) {
let bf = blocking_funcs
.get_or_insert_with(|| self.find_blocking_functions(graph));
let transitive = self.check_transitive_blocking(graph, &func, bf);
if !transitive.is_empty() {
notes.push(format!(
"⚠️ Also calls blocking functions: {}",
transitive.join(", ")
));
}
}
let context_notes = if notes.is_empty() {
String::new()
} else {
format!("\n\n**Analysis:**\n{}", notes.join("\n"))
};
let severity = if blocking_call.contains("sleep")
|| blocking_call.contains("Sync")
|| blocking_call.contains("subprocess")
{
Severity::High } else {
Severity::Medium
};
findings.push(Finding {
id: String::new(),
detector: "SyncInAsyncDetector".to_string(),
severity,
title: format!("Blocking call `{}` in async function", blocking_call),
description: format!(
"Synchronous blocking call inside async function will block the event loop, \
preventing other async tasks from running.{}",
context_notes
),
affected_files: vec![path.to_path_buf()],
line_start: Some((i + 1) as u32),
line_end: Some((i + 1) as u32),
suggested_fix: Some(format!(
"Replace with async alternative: `{}`\n\n\
Example:\n\
```python\n\
# Instead of: time.sleep(1)\n\
await asyncio.sleep(1)\n\
\n\
# Instead of: requests.get(url)\n\
async with aiohttp.ClientSession() as session:\n\
async with session.get(url) as response:\n\
data = await response.json()\n\
```",
alternative
)),
estimated_effort: Some("20 minutes".to_string()),
category: Some("performance".to_string()),
cwe_id: Some("CWE-400".to_string()),
why_it_matters: Some(
"Blocking calls in async code prevent the event loop from processing other tasks. \
This defeats the purpose of async/await and can cause the entire application to hang.".to_string()
),
..Default::default()
});
}
}
}
}
info!(
"SyncInAsyncDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for SyncInAsyncDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
#[test]
fn test_detects_time_sleep_in_async_def() {
let store = GraphBuilder::new().freeze();
let detector = SyncInAsyncDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(&store, vec![
("server.py", "import asyncio\nimport time\n\nasync def handle_request():\n data = await fetch_data()\n time.sleep(1)\n return data\n"),
]);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect time.sleep() inside async def"
);
assert!(
findings.iter().any(|f| f.title.contains("time.sleep")),
"Finding should mention time.sleep"
);
}
#[test]
fn test_no_finding_for_sync_function() {
let store = GraphBuilder::new().freeze();
let detector = SyncInAsyncDetector::new("/mock/repo");
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![(
"utils.py",
"import time\n\ndef slow_function():\n time.sleep(1)\n return 42\n",
)],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag time.sleep() in a regular (non-async) function, got: {:?}",
findings
);
}
}