use std::path::PathBuf;
use std::sync::LazyLock;
use sqlx::Row;
use super::{
build_entry_point_names, build_trait_method_names, DeadConfidence, DeadFunction, LightChunk,
TRAIT_IMPL_RE,
};
use crate::parser::{ChunkType, Language};
use crate::store::helpers::{clamp_line_number, ChunkRow, ChunkSummary, StoreError};
use crate::store::Store;
impl<Mode> Store<Mode> {
pub fn find_dead_code(
&self,
include_pub: bool,
) -> Result<(Vec<DeadFunction>, Vec<DeadFunction>), StoreError> {
let _span = tracing::info_span!("find_dead_code", include_pub).entered();
self.rt.block_on(async {
let all_uncalled = self.fetch_uncalled_functions().await?;
let total_uncalled = all_uncalled.len();
let test_names: std::collections::HashSet<String> = self
.find_test_chunk_names_async()
.await?
.into_iter()
.collect();
let candidates = Self::filter_candidates(all_uncalled, &test_names);
let active_files = self.fetch_active_files().await?;
let (confident, possibly_dead_pub) = self
.score_confidence(candidates, &active_files, include_pub)
.await?;
tracing::info!(
total_uncalled,
confident = confident.len(),
possibly_dead = possibly_dead_pub.len(),
"Dead code analysis complete"
);
Ok((confident, possibly_dead_pub))
})
}
async fn fetch_uncalled_functions(&self) -> Result<Vec<LightChunk>, StoreError> {
let callable = ChunkType::callable_sql_list();
let sql = format!(
"SELECT c.id, c.origin, c.language, c.chunk_type, c.name, c.signature,
c.line_start, c.line_end, c.parent_id
FROM chunks c
WHERE c.chunk_type IN ({callable})
AND NOT EXISTS (SELECT 1 FROM function_calls fc WHERE fc.callee_name = c.name LIMIT 1)
AND c.parent_id IS NULL
ORDER BY c.origin, c.line_start"
);
let rows: Vec<_> = sqlx::query(&sql).fetch_all(&self.pool).await?;
Ok(rows
.into_iter()
.map(|row| LightChunk {
id: row.get(0),
file: PathBuf::from(row.get::<String, _>(1)),
language: {
let raw: String = row.get(2);
raw.parse().unwrap_or_else(|_| {
tracing::warn!(raw = %raw, "Unknown language in DB, defaulting to Rust");
Language::Rust
})
},
chunk_type: {
let raw: String = row.get(3);
raw.parse().unwrap_or_else(|_| {
tracing::warn!(raw = %raw, "Unknown chunk_type in DB, defaulting to Function");
ChunkType::Function
})
},
name: row.get(4),
signature: row.get(5),
line_start: clamp_line_number(row.get::<i64, _>(6)),
line_end: clamp_line_number(row.get::<i64, _>(7)),
})
.collect())
}
fn filter_candidates(
uncalled: Vec<LightChunk>,
test_names: &std::collections::HashSet<String>,
) -> Vec<LightChunk> {
static ENTRY_POINTS: LazyLock<std::collections::HashSet<&'static str>> =
LazyLock::new(|| build_entry_point_names().into_iter().collect());
static TRAIT_METHODS: LazyLock<std::collections::HashSet<&'static str>> =
LazyLock::new(|| build_trait_method_names().into_iter().collect());
let entry_points = &*ENTRY_POINTS;
let trait_methods = &*TRAIT_METHODS;
let mut candidates = Vec::new();
for chunk in uncalled {
if entry_points.contains(chunk.name.as_str()) {
continue;
}
if test_names.contains(&chunk.name) {
continue;
}
let path_str = chunk.file.to_string_lossy();
if crate::is_test_chunk(&chunk.name, &path_str) {
continue;
}
if chunk.chunk_type == ChunkType::Method && trait_methods.contains(chunk.name.as_str())
{
continue;
}
if chunk.chunk_type == ChunkType::Method && TRAIT_IMPL_RE.is_match(&chunk.signature) {
continue;
}
candidates.push(chunk);
}
candidates
}
async fn fetch_active_files(&self) -> Result<std::collections::HashSet<String>, StoreError> {
let rows: Vec<(String,)> = sqlx::query_as(
"SELECT DISTINCT file FROM function_calls
UNION
SELECT DISTINCT c.origin FROM chunks c
JOIN type_edges te ON c.id = te.source_chunk_id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows.into_iter().map(|(f,)| f).collect())
}
async fn score_confidence(
&self,
candidates: Vec<LightChunk>,
active_files: &std::collections::HashSet<String>,
include_pub: bool,
) -> Result<(Vec<DeadFunction>, Vec<DeadFunction>), StoreError> {
let candidate_ids: Vec<&str> = candidates.iter().map(|c| c.id.as_str()).collect();
let mut content_map: std::collections::HashMap<String, (String, Option<String>)> =
std::collections::HashMap::new();
use crate::store::helpers::sql::max_rows_per_statement;
let batch_size = max_rows_per_statement(1);
for batch in candidate_ids.chunks(batch_size) {
let placeholders = super::super::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT id, content, doc FROM chunks WHERE id IN ({})",
placeholders
);
let mut q = sqlx::query(&sql);
for id in batch {
q = q.bind(id);
}
let rows: Vec<_> = q.fetch_all(&self.pool).await?;
for row in rows {
let id: String = row.get(0);
let content: String = row.get(1);
let doc: Option<String> = row.get(2);
content_map.insert(id, (content, doc));
}
}
let mut confident = Vec::new();
let mut possibly_dead_pub = Vec::new();
for light in candidates {
let (content, doc) = match content_map.remove(&light.id) {
Some(pair) => pair,
None => {
tracing::warn!(
chunk_id = %light.id,
name = %light.name,
"Content missing for dead code candidate — chunk may be stale"
);
(String::new(), None)
}
};
if light.chunk_type == ChunkType::Method && TRAIT_IMPL_RE.is_match(&content) {
continue;
}
if content.contains("no_mangle") {
continue;
}
let is_pub = content.starts_with("pub ")
|| content.starts_with("pub(")
|| light.signature.starts_with("pub ")
|| light.signature.starts_with("pub(");
let is_method = light.chunk_type == ChunkType::Method;
let file_str = light.file.to_string_lossy();
let file_is_active = active_files.contains(file_str.as_ref());
let confidence = if is_method {
DeadConfidence::Low
} else if !file_is_active {
DeadConfidence::High
} else {
DeadConfidence::Medium
};
let chunk = ChunkSummary::from(ChunkRow::from_light_chunk(light, content, doc));
let dead_fn = DeadFunction { chunk, confidence };
if is_pub && !include_pub {
possibly_dead_pub.push(dead_fn);
} else {
confident.push(dead_fn);
}
}
Ok((confident, possibly_dead_pub))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_helpers::setup_store;
#[test]
fn test_entry_point_exclusion() {
let (store, _dir) = setup_store();
let emb = crate::embedder::Embedding::new(vec![0.0; crate::EMBEDDING_DIM]);
for name in &["main", "init", "handler", "middleware"] {
let chunk = crate::parser::Chunk {
id: format!("src/app.rs:1:{name}"),
file: std::path::PathBuf::from("src/app.rs"),
language: crate::parser::Language::Rust,
chunk_type: crate::parser::ChunkType::Function,
name: name.to_string(),
signature: format!("fn {name}()"),
content: format!("fn {name}() {{}}"),
doc: None,
line_start: 1,
line_end: 3,
content_hash: format!("{name}_hash"),
parent_id: None,
window_idx: None,
parent_type_name: None,
};
store.upsert_chunk(&chunk, &emb, Some(12345)).unwrap();
}
let (confident, possibly_pub) = store.find_dead_code(true).unwrap();
let all_names: Vec<&str> = confident
.iter()
.chain(possibly_pub.iter())
.map(|d| d.chunk.name.as_str())
.collect();
for ep in &["main", "init", "handler", "middleware"] {
assert!(
!all_names.contains(ep),
"Entry point '{ep}' should be excluded from dead code"
);
}
}
#[test]
fn test_confidence_assignment() {
let (store, _dir) = setup_store();
let emb = crate::embedder::Embedding::new(vec![0.0; crate::EMBEDDING_DIM]);
let func_chunk = crate::parser::Chunk {
id: "src/orphan.rs:1:func_hash".to_string(),
file: std::path::PathBuf::from("src/orphan.rs"),
language: crate::parser::Language::Rust,
chunk_type: crate::parser::ChunkType::Function,
name: "orphan_func".to_string(),
signature: "fn orphan_func()".to_string(),
content: "fn orphan_func() {}".to_string(),
doc: None,
line_start: 1,
line_end: 3,
content_hash: "func_hash".to_string(),
parent_id: None,
window_idx: None,
parent_type_name: None,
};
store.upsert_chunk(&func_chunk, &emb, Some(12345)).unwrap();
let method_chunk = crate::parser::Chunk {
id: "src/orphan.rs:5:meth_hash".to_string(),
file: std::path::PathBuf::from("src/orphan.rs"),
language: crate::parser::Language::Rust,
chunk_type: crate::parser::ChunkType::Method,
name: "orphan_method".to_string(),
signature: "fn orphan_method(&self)".to_string(),
content: "fn orphan_method(&self) {}".to_string(),
doc: None,
line_start: 5,
line_end: 7,
content_hash: "meth_hash".to_string(),
parent_id: None,
window_idx: None,
parent_type_name: None,
};
store
.upsert_chunk(&method_chunk, &emb, Some(12345))
.unwrap();
let (confident, _) = store.find_dead_code(true).unwrap();
let func_dead = confident.iter().find(|d| d.chunk.name == "orphan_func");
let method_dead = confident.iter().find(|d| d.chunk.name == "orphan_method");
assert!(
func_dead.is_some(),
"orphan_func should be in dead code list"
);
assert_eq!(
func_dead.unwrap().confidence,
DeadConfidence::High,
"Private function in inactive file should be High confidence"
);
assert!(
method_dead.is_some(),
"orphan_method should be in dead code list"
);
assert_eq!(
method_dead.unwrap().confidence,
DeadConfidence::Low,
"Method should be Low confidence"
);
}
}