use std::collections::HashMap;
use std::path::{Path, PathBuf};
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use walkdir::WalkDir;
use crate::analysis::dead::{
collect_all_functions, dead_code_analysis, dead_code_analysis_refcount,
};
use crate::analysis::refcount::count_identifiers_in_tree;
use crate::ast::extract::extract_file;
use crate::ast::parser::parse_file;
use crate::callgraph::build_project_call_graph;
use crate::error::TldrError;
use crate::types::{Language, ModuleInfo, ProjectCallGraph};
use crate::TldrResult;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Visibility {
Public,
Private,
Internal,
}
impl Visibility {
pub fn from_name(name: &str) -> Self {
let base_name = name.rsplit('.').next().unwrap_or(name);
if base_name.starts_with("__") && base_name.ends_with("__") && base_name.len() > 4 {
Visibility::Public
} else if base_name.starts_with("__") {
Visibility::Internal
} else if base_name.starts_with('_') {
Visibility::Private
} else {
Visibility::Public
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DeadReason {
NeverCalled,
OnlyCalledByDead,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeadFunction {
pub name: String,
pub file: PathBuf,
pub line: usize,
pub visibility: Visibility,
pub reason: DeadReason,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeadCodeSummary {
pub total_dead: usize,
pub total_functions: usize,
pub dead_percentage: f64,
pub dead_public: usize,
pub dead_private: usize,
}
impl Default for DeadCodeSummary {
fn default() -> Self {
Self {
total_dead: 0,
total_functions: 0,
dead_percentage: 0.0,
dead_public: 0,
dead_private: 0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeadCodeReport {
pub functions_analyzed: usize,
pub dead_count: usize,
pub dead_percentage: f64,
pub dead_functions: Vec<DeadFunction>,
pub by_file: IndexMap<PathBuf, Vec<DeadFunction>>,
pub summary: DeadCodeSummary,
}
impl Default for DeadCodeReport {
fn default() -> Self {
Self {
functions_analyzed: 0,
dead_count: 0,
dead_percentage: 0.0,
dead_functions: Vec::new(),
by_file: IndexMap::new(),
summary: DeadCodeSummary::default(),
}
}
}
pub fn analyze_dead_code(
path: &Path,
language: Option<Language>,
entry_points: &[&str],
) -> TldrResult<DeadCodeReport> {
let lang = match language {
Some(l) => l,
None => detect_language(path)?,
};
let module_infos = collect_module_infos_for_dead(path, lang);
let ref_counts = build_refcounts(path, lang);
let entry_patterns: Vec<String> = entry_points.iter().map(|s| s.to_string()).collect();
let entry_ref = if entry_patterns.is_empty() {
None
} else {
Some(entry_patterns.as_slice())
};
let all_functions = collect_all_functions(&module_infos);
let function_lines = collect_function_lines(&module_infos);
let core_report = dead_code_analysis_refcount(&all_functions, &ref_counts, entry_ref)?;
transform_core_report(&core_report, &function_lines)
}
pub fn analyze_dead_code_with_graph(
call_graph: &ProjectCallGraph,
module_infos: &[(PathBuf, ModuleInfo)],
entry_points: &[&str],
) -> TldrResult<DeadCodeReport> {
let entry_patterns: Vec<String> = entry_points.iter().map(|s| s.to_string()).collect();
let entry_ref = if entry_patterns.is_empty() {
None
} else {
Some(entry_patterns.as_slice())
};
let all_functions = collect_all_functions(module_infos);
let function_lines = collect_function_lines(module_infos);
let core_report = dead_code_analysis(call_graph, &all_functions, entry_ref)?;
transform_core_report(&core_report, &function_lines)
}
pub fn analyze_dead_code_with_refcount(
path: &Path,
language: Language,
module_infos: &[(PathBuf, ModuleInfo)],
entry_points: &[&str],
) -> TldrResult<DeadCodeReport> {
let entry_patterns: Vec<String> = entry_points.iter().map(|s| s.to_string()).collect();
let entry_ref = if entry_patterns.is_empty() {
None
} else {
Some(entry_patterns.as_slice())
};
let all_functions = collect_all_functions(module_infos);
let function_lines = collect_function_lines(module_infos);
let ref_counts = build_refcounts(path, language);
let core_report = dead_code_analysis_refcount(&all_functions, &ref_counts, entry_ref)?;
transform_core_report(&core_report, &function_lines)
}
fn transform_core_report(
core_report: &crate::types::DeadCodeReport,
function_lines: &HashMap<(PathBuf, String), usize>,
) -> TldrResult<DeadCodeReport> {
let mut dead_functions: Vec<DeadFunction> = Vec::new();
let mut by_file: IndexMap<PathBuf, Vec<DeadFunction>> = IndexMap::new();
let mut dead_public = 0;
let mut dead_private = 0;
for func_ref in &core_report.dead_functions {
let line = function_lines
.get(&(func_ref.file.clone(), func_ref.name.clone()))
.copied()
.unwrap_or(0);
let visibility = Visibility::from_name(&func_ref.name);
match visibility {
Visibility::Public => dead_public += 1,
Visibility::Private | Visibility::Internal => dead_private += 1,
}
let dead_func = DeadFunction {
name: func_ref.name.clone(),
file: func_ref.file.clone(),
line,
visibility,
reason: DeadReason::NeverCalled,
};
dead_functions.push(dead_func.clone());
by_file
.entry(func_ref.file.clone())
.or_default()
.push(dead_func);
}
dead_functions.sort_by(|a, b| a.file.cmp(&b.file).then_with(|| a.line.cmp(&b.line)));
for funcs in by_file.values_mut() {
funcs.sort_by_key(|f| f.line);
}
let total_functions = core_report.total_functions;
let dead_count = dead_functions.len();
let dead_percentage = if total_functions > 0 {
(dead_count as f64 / total_functions as f64) * 100.0
} else {
0.0
};
Ok(DeadCodeReport {
functions_analyzed: total_functions,
dead_count,
dead_percentage,
dead_functions,
by_file,
summary: DeadCodeSummary {
total_dead: dead_count,
total_functions,
dead_percentage,
dead_public,
dead_private,
},
})
}
fn build_refcounts(path: &Path, language: Language) -> HashMap<String, usize> {
let mut ref_counts: HashMap<String, usize> = HashMap::new();
if path.is_file() {
if let Ok((tree, source, _lang)) = parse_file(path) {
let counts = count_identifiers_in_tree(&tree, source.as_bytes(), language);
for (name, count) in counts {
*ref_counts.entry(name).or_insert(0) += count;
}
}
} else {
let extensions = language.extensions();
for entry in WalkDir::new(path)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
let file_path = entry.path();
if file_path.is_file() {
if let Some(ext) = file_path.extension().and_then(|e| e.to_str()) {
let ext_with_dot = format!(".{}", ext);
if extensions.contains(&ext_with_dot.as_str()) {
if let Ok((tree, source, _lang)) = parse_file(file_path) {
let counts =
count_identifiers_in_tree(&tree, source.as_bytes(), language);
for (name, count) in counts {
*ref_counts.entry(name).or_insert(0) += count;
}
}
}
}
}
}
}
ref_counts
}
fn collect_module_infos_for_dead(path: &Path, language: Language) -> Vec<(PathBuf, ModuleInfo)> {
let mut module_infos: Vec<(PathBuf, ModuleInfo)> = Vec::new();
if path.is_file() {
if let Ok(info) = extract_file(path, path.parent()) {
module_infos.push((path.to_path_buf(), info));
}
} else {
let extensions = language.extensions();
for entry in WalkDir::new(path)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
let file_path = entry.path();
if file_path.is_file() {
if let Some(ext) = file_path.extension().and_then(|e| e.to_str()) {
let ext_with_dot = format!(".{}", ext);
if extensions.contains(&ext_with_dot.as_str()) {
if let Ok(info) = extract_file(file_path, Some(path)) {
module_infos.push((file_path.to_path_buf(), info));
}
}
}
}
}
}
module_infos
}
#[allow(dead_code)]
fn build_call_graph_and_collect(
path: &Path,
language: Language,
) -> TldrResult<(ProjectCallGraph, Vec<(PathBuf, ModuleInfo)>)> {
let call_graph = build_project_call_graph(path, language, None, true)?;
let module_infos = collect_module_infos_for_dead(path, language);
Ok((call_graph, module_infos))
}
fn collect_function_lines(
module_infos: &[(PathBuf, ModuleInfo)],
) -> HashMap<(PathBuf, String), usize> {
let mut lines = HashMap::new();
for (file_path, info) in module_infos {
for func in &info.functions {
lines.insert(
(file_path.clone(), func.name.clone()),
func.line_number as usize,
);
}
for class in &info.classes {
for method in &class.methods {
let full_name = format!("{}.{}", class.name, method.name);
lines.insert((file_path.clone(), full_name), method.line_number as usize);
}
}
}
lines
}
fn detect_language(path: &Path) -> TldrResult<Language> {
if path.is_file() {
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
return Language::from_extension(ext)
.ok_or_else(|| TldrError::UnsupportedLanguage(ext.to_string()));
}
}
let mut counts: HashMap<Language, usize> = HashMap::new();
for entry in WalkDir::new(path)
.max_depth(3)
.into_iter()
.filter_map(|e| e.ok())
{
if entry.file_type().is_file() {
if let Some(ext) = entry.path().extension().and_then(|e| e.to_str()) {
if let Some(lang) = Language::from_extension(ext) {
*counts.entry(lang).or_default() += 1;
}
}
}
}
counts
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(lang, _)| lang)
.ok_or_else(|| TldrError::NoSupportedFiles(path.to_path_buf()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_visibility_from_name() {
assert_eq!(Visibility::from_name("public_func"), Visibility::Public);
assert_eq!(Visibility::from_name("MyClass.method"), Visibility::Public);
assert_eq!(Visibility::from_name("__dunder__"), Visibility::Public);
assert_eq!(Visibility::from_name("__init__"), Visibility::Public);
assert_eq!(Visibility::from_name("_private_func"), Visibility::Private);
assert_eq!(
Visibility::from_name("MyClass._private"),
Visibility::Private
);
assert_eq!(
Visibility::from_name("__internal_func"),
Visibility::Internal
);
assert_eq!(Visibility::from_name("__mangled"), Visibility::Internal);
}
#[test]
fn test_dead_code_report_default() {
let report = DeadCodeReport::default();
assert_eq!(report.functions_analyzed, 0);
assert_eq!(report.dead_count, 0);
assert_eq!(report.dead_percentage, 0.0);
assert!(report.dead_functions.is_empty());
}
#[test]
fn test_analyze_dead_code_refcount_rescues_called() {
use std::fs;
use tempfile::TempDir;
let dir = TempDir::new().unwrap();
let content = r#"
def helper():
return 42
def main_func():
return helper()
"#;
fs::write(dir.path().join("example.py"), content).unwrap();
let result = analyze_dead_code(dir.path(), Some(Language::Python), &[]);
assert!(result.is_ok(), "analyze_dead_code should succeed");
let report = result.unwrap();
let dead_names: Vec<&str> = report
.dead_functions
.iter()
.map(|f| f.name.as_str())
.collect();
assert!(
!dead_names.contains(&"helper"),
"helper should NOT be dead (refcount > 1), but got dead_names: {:?}",
dead_names
);
}
#[test]
fn test_analyze_dead_code_refcount_flags_unreferenced() {
use std::fs;
use tempfile::TempDir;
let dir = TempDir::new().unwrap();
let content = r#"
def _unused_helper():
return 42
def main_func():
return 99
"#;
fs::write(dir.path().join("example.py"), content).unwrap();
let result = analyze_dead_code(dir.path(), Some(Language::Python), &[]);
assert!(result.is_ok(), "analyze_dead_code should succeed");
let report = result.unwrap();
let dead_names: Vec<&str> = report
.dead_functions
.iter()
.map(|f| f.name.as_str())
.collect();
assert!(
dead_names.contains(&"_unused_helper"),
"_unused_helper should be dead (refcount == 1, private), got dead_names: {:?}",
dead_names
);
}
#[test]
fn test_analyze_dead_code_with_refcount_api() {
use std::fs;
use tempfile::TempDir;
let dir = TempDir::new().unwrap();
let content = r#"
def _orphan():
return 1
def used_func():
return 2
def caller():
return used_func()
"#;
fs::write(dir.path().join("mod.py"), content).unwrap();
let module_infos = {
let mut infos = Vec::new();
for entry in walkdir::WalkDir::new(dir.path())
.into_iter()
.filter_map(|e| e.ok())
{
if entry.path().extension().map(|e| e == "py").unwrap_or(false) {
if let Ok(info) =
crate::ast::extract::extract_file(entry.path(), Some(dir.path()))
{
infos.push((entry.path().to_path_buf(), info));
}
}
}
infos
};
let result = analyze_dead_code_with_refcount(
dir.path(),
Language::Python,
&module_infos,
&["main", "test_"],
);
assert!(
result.is_ok(),
"analyze_dead_code_with_refcount should succeed"
);
let report = result.unwrap();
let dead_names: Vec<&str> = report
.dead_functions
.iter()
.map(|f| f.name.as_str())
.collect();
assert!(
dead_names.contains(&"_orphan"),
"_orphan should be dead, got: {:?}",
dead_names
);
assert!(
!dead_names.contains(&"used_func"),
"used_func should NOT be dead (referenced), got: {:?}",
dead_names
);
}
#[test]
fn test_dead_code_with_refcount_no_cg_required() {
use std::fs;
use tempfile::TempDir;
let dir = TempDir::new().unwrap();
let content = "def _lonely():\n pass\n";
fs::write(dir.path().join("solo.py"), content).unwrap();
let module_infos = {
let mut infos = Vec::new();
if let Ok(info) =
crate::ast::extract::extract_file(&dir.path().join("solo.py"), Some(dir.path()))
{
infos.push((dir.path().join("solo.py"), info));
}
infos
};
let result =
analyze_dead_code_with_refcount(dir.path(), Language::Python, &module_infos, &[]);
assert!(result.is_ok());
let report = result.unwrap();
assert!(report.functions_analyzed > 0);
}
}