use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use anyhow::{Context, Result};
use regex::Regex;
use toml_edit::{DocumentMut, Item};
use walkdir::WalkDir;
use crate::models::CrateReference;
use crate::utils::is_std_crate;
pub struct DependencyAnalyzer {
project_root: PathBuf,
debug: bool,
}
impl DependencyAnalyzer {
pub fn new(project_root: PathBuf) -> Self {
Self {
project_root,
debug: false,
}
}
pub fn with_debug(project_root: PathBuf, debug: bool) -> Self {
Self {
project_root,
debug,
}
}
pub fn analyze_dependencies(&self) -> Result<HashMap<String, CrateReference>> {
let mut crate_refs = HashMap::new();
let use_regex = Regex::new(r"^\s*use\s+([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z0-9_]*)*)")?;
let extern_regex = Regex::new(r"^\s*extern\s+crate\s+([a-zA-Z_][a-zA-Z0-9_]*)")?;
self.load_existing_dependencies(&mut crate_refs)?;
for entry in WalkDir::new(&self.project_root) {
let entry = entry?;
let path = entry.path();
if path.to_string_lossy().contains("tests/")
|| path.file_name().is_some_and(|f| f == "build.rs")
{
continue;
}
if path.extension().is_some_and(|ext| ext == "rs") {
let content = fs::read_to_string(path)?;
let file_path = path.to_path_buf();
self.analyze_file(FileAnalysisContext {
content: content.trim().to_string(),
file_path: &file_path,
use_regex: &use_regex,
extern_regex: &extern_regex,
crate_refs: &mut crate_refs,
})?;
}
}
crate_refs.retain(|name, _| {
!name.ends_with("_test")
&& !name.ends_with("_tests")
&& name != "test"
&& name != "tempfile"
&& !name.starts_with("crate")
});
if self.debug {
println!("\nFinal crate references:");
for (name, crate_ref) in &crate_refs {
println!("- {} (used in {} files)", name, crate_ref.usage_count());
if crate_ref.is_path_dependency {
println!(
" Path dependency: {}",
crate_ref.path.as_ref().unwrap_or(&"unknown".to_string())
);
}
if let Some(publish) = crate_ref.publish {
println!(" Publish: {}", publish);
}
println!(" Used in:");
for path in &crate_ref.used_in {
println!(" - {:?}", path);
}
}
}
Ok(crate_refs)
}
fn load_existing_dependencies(
&self,
crate_refs: &mut HashMap<String, CrateReference>,
) -> Result<()> {
let cargo_toml_path = self.project_root.join("Cargo.toml");
if !cargo_toml_path.exists() {
return Ok(());
}
if self.debug {
println!("Loading dependencies from {:?}", cargo_toml_path);
}
let content = fs::read_to_string(&cargo_toml_path)
.with_context(|| format!("Failed to read Cargo.toml at {:?}", cargo_toml_path))?;
let doc = content
.parse::<DocumentMut>()
.with_context(|| format!("Failed to parse Cargo.toml at {:?}", cargo_toml_path))?;
let publish = if let Some(package) = doc.get("package") {
if let Some(publish_value) = package.get("publish") {
publish_value.as_bool()
} else {
None
}
} else {
None
};
if self.debug {
println!("Package publish setting: {:?}", publish);
}
if let Some(dependencies) = doc.get("dependencies").and_then(|d| d.as_table()) {
for (name, value) in dependencies.iter() {
let crate_name = name.to_string();
if self.debug {
println!("Found dependency: {}", crate_name);
println!("Dependency value type: {:?}", value);
}
if crate_refs.contains_key(&crate_name) {
continue;
}
match value {
Item::Table(table) => {
if self.debug {
println!("Dependency {} is a table: {:?}", crate_name, table);
}
if let Some(path_value) = table.get("path") {
if self.debug {
println!("Path value for {}: {:?}", crate_name, path_value);
}
if let Some(path_str) = path_value.as_str() {
let mut crate_ref = CrateReference::with_path(
crate_name.clone(),
path_str.to_string(),
);
if let Some(publish_value) = publish {
crate_ref.set_publish(publish_value);
}
if self.debug {
println!(
"Adding path dependency: {} at {}",
crate_name, path_str
);
println!("With publish setting: {:?}", crate_ref.publish);
}
crate_refs.insert(crate_name, crate_ref);
}
}
}
Item::Value(val) if val.is_inline_table() => {
if self.debug {
println!("Dependency {} is an inline table: {:?}", crate_name, val);
}
if let Some(inline_table) = val.as_inline_table() {
if let Some(path_value) = inline_table.get("path") {
if self.debug {
println!("Path value for {}: {:?}", crate_name, path_value);
}
if let Some(path_str) = path_value.as_str() {
let mut crate_ref = CrateReference::with_path(
crate_name.clone(),
path_str.to_string(),
);
if let Some(publish_value) = publish {
crate_ref.set_publish(publish_value);
}
if self.debug {
println!(
"Adding path dependency (inline): {} at {}",
crate_name, path_str
);
println!("With publish setting: {:?}", crate_ref.publish);
}
crate_refs.insert(crate_name, crate_ref);
}
}
}
}
_ => {
if self.debug {
println!("Skipping regular dependency: {}", crate_name);
}
}
}
}
} else if self.debug {
println!("No dependencies section found in Cargo.toml");
}
Ok(())
}
fn analyze_file(&self, ctx: FileAnalysisContext) -> Result<()> {
let FileAnalysisContext {
content,
file_path,
use_regex: _,
extern_regex,
crate_refs,
} = ctx;
let mut current_line_num = 0;
let lines: Vec<&str> = content.lines().collect();
while current_line_num < lines.len() {
let line = lines[current_line_num].trim();
current_line_num += 1;
if line.is_empty() {
continue;
}
if line.starts_with("//") || line.starts_with("/*") {
continue;
}
if line.starts_with("use") {
let mut use_statement = line.to_string();
let mut brace_count = line.chars().filter(|&c| c == '{').count()
- line.chars().filter(|&c| c == '}').count();
while brace_count > 0 && current_line_num < lines.len() {
let next_line = lines[current_line_num].trim();
current_line_num += 1;
use_statement.push('\n');
use_statement.push_str(next_line);
brace_count += next_line.chars().filter(|&c| c == '{').count();
brace_count -= next_line.chars().filter(|&c| c == '}').count();
}
self.extract_crates_from_use(&use_statement, crate_refs)?;
continue;
}
if let Some(cap) = extern_regex.captures(line) {
let crate_name = cap[1].to_string();
if !is_std_crate(&crate_name) {
crate_refs
.entry(crate_name.clone())
.or_insert_with(|| CrateReference::new(crate_name))
.add_usage(file_path.clone());
}
}
}
Ok(())
}
fn extract_crates_from_use(
&self,
use_statement: &str,
crate_refs: &mut HashMap<String, CrateReference>,
) -> Result<()> {
let clean_use = self.remove_comments(use_statement);
if self.debug {
println!("Cleaned use statement: {}", clean_use);
}
let statement = clean_use.trim_start_matches("use").trim();
if !statement.starts_with('{') && statement.contains("::") {
let parts: Vec<&str> = statement.split("::").collect();
if !parts.is_empty() {
let crate_name = parts[0].trim_end_matches(':').trim();
self.add_crate_if_valid(crate_name, crate_refs);
}
}
else if !statement.starts_with('{') && statement.contains("::") && statement.contains('{')
{
let parts: Vec<&str> = statement.split("::").collect();
if !parts.is_empty() {
let crate_name = parts[0].trim();
self.add_crate_if_valid(crate_name, crate_refs);
}
}
else if statement.starts_with('{') {
let content = &statement[1..statement.rfind('}').unwrap_or(statement.len())];
for item in content.split(',') {
let item = item.trim();
if item.is_empty() {
continue;
}
if item.contains("::") {
let parts: Vec<&str> = item.split("::").collect();
if !parts.is_empty() {
let crate_name = parts[0].trim();
self.add_crate_if_valid(crate_name, crate_refs);
}
}
else {
let crate_name = item.trim();
self.add_crate_if_valid(crate_name, crate_refs);
}
}
}
else {
let crate_name = statement.trim_end_matches(';').trim();
self.add_crate_if_valid(crate_name, crate_refs);
}
Ok(())
}
fn add_crate_if_valid(
&self,
crate_name: &str,
crate_refs: &mut HashMap<String, CrateReference>,
) {
let clean_name = crate_name.trim().trim_end_matches(['}', '\n', '\r', ':']);
if !clean_name.is_empty()
&& !is_std_crate(clean_name)
&& clean_name != "crate"
&& clean_name != "self"
&& clean_name != "super"
{
if self.debug {
println!("Found crate: {}", clean_name);
}
crate_refs
.entry(clean_name.to_string())
.or_insert_with(|| CrateReference::new(clean_name.to_string()))
.add_usage(PathBuf::from(""));
}
}
fn remove_comments(&self, code: &str) -> String {
let mut clean_code = String::new();
let mut in_line_comment = false;
let mut in_block_comment = false;
let mut i = 0;
let chars: Vec<char> = code.chars().collect();
while i < chars.len() {
if in_line_comment {
if chars[i] == '\n' {
in_line_comment = false;
clean_code.push('\n');
}
i += 1;
continue;
}
if in_block_comment {
if i + 1 < chars.len() && chars[i] == '*' && chars[i + 1] == '/' {
in_block_comment = false;
i += 2;
} else {
i += 1;
}
continue;
}
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
in_line_comment = true;
i += 2;
continue;
}
if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
in_block_comment = true;
i += 2;
continue;
}
clean_code.push(chars[i]);
i += 1;
}
clean_code
}
}
struct FileAnalysisContext<'a> {
content: String,
file_path: &'a PathBuf,
#[allow(dead_code)]
use_regex: &'a Regex,
extern_regex: &'a Regex,
crate_refs: &'a mut HashMap<String, CrateReference>,
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
fn create_test_file(dir: &TempDir, name: &str, content: &str) -> Result<PathBuf> {
let path = dir.path().join(name);
let mut file = File::create(&path)?;
writeln!(file, "{}", content.trim())?;
Ok(path)
}
#[test]
fn test_analyze_dependencies() -> Result<()> {
let temp_dir = TempDir::new()?;
let main_rs = create_test_file(
&temp_dir,
"main.rs",
r#"use serde::Serialize;
use tokio::runtime::Runtime;
use anyhow::Result;
use std::fs;"#,
)?;
let lib_rs = create_test_file(
&temp_dir,
"lib.rs",
r#"use serde::{Deserialize, Serialize};
use regex::Regex;
extern crate serde;"#,
)?;
println!("\nTest files created:");
println!("main.rs content:\n{}", fs::read_to_string(&main_rs)?);
println!("lib.rs content:\n{}", fs::read_to_string(&lib_rs)?);
println!("\nStarting analysis...\n");
let analyzer = DependencyAnalyzer::new(temp_dir.path().to_path_buf());
let crate_refs = analyzer.analyze_dependencies()?;
println!("\nAnalysis complete. Found crates:");
for (name, crate_ref) in &crate_refs {
println!("- {} (used in {} files)", name, crate_ref.usage_count());
println!(" Used in:");
for path in &crate_ref.used_in {
if let Ok(relative) = path.strip_prefix(temp_dir.path()) {
println!(" - {}", relative.display());
}
}
}
assert!(
crate_refs.contains_key("serde"),
"serde dependency not found"
);
assert!(
crate_refs.contains_key("tokio"),
"tokio dependency not found"
);
assert!(
crate_refs.contains_key("anyhow"),
"anyhow dependency not found"
);
assert!(
crate_refs.contains_key("regex"),
"regex dependency not found"
);
let serde_ref = crate_refs.get("serde").unwrap();
assert_eq!(
serde_ref.usage_count(),
2,
"serde should be used in two files"
);
Ok(())
}
#[test]
fn test_load_existing_dependencies() -> Result<()> {
let temp_dir = TempDir::new()?;
let cargo_toml_content = r#"
[package]
name = "test-package"
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
serde = "1.0"
internal-crate = { path = "../internal-crate" }
"#;
let cargo_toml_path = temp_dir.path().join("Cargo.toml");
let mut file = File::create(&cargo_toml_path)?;
writeln!(file, "{}", cargo_toml_content)?;
fs::create_dir_all(temp_dir.path().join("src"))?;
let main_rs_path = temp_dir.path().join("src/main.rs");
let main_rs_content = r#"
fn main() {
println!("Hello, world!");
}
"#;
let mut file = File::create(main_rs_path)?;
writeln!(file, "{}", main_rs_content)?;
let analyzer = DependencyAnalyzer::with_debug(temp_dir.path().to_path_buf(), true);
let crate_refs = analyzer.analyze_dependencies()?;
assert!(
crate_refs.contains_key("internal-crate"),
"internal-crate dependency not found"
);
if let Some(internal_crate) = crate_refs.get("internal-crate") {
assert!(
internal_crate.is_path_dependency,
"internal-crate should be a path dependency"
);
assert_eq!(
internal_crate.path,
Some("../internal-crate".to_string()),
"internal-crate path should be ../internal-crate"
);
assert_eq!(
internal_crate.publish,
Some(false),
"publish should be false"
);
}
Ok(())
}
#[test]
fn test_analyze_file() -> Result<()> {
let temp_dir = TempDir::new()?;
let analyzer = DependencyAnalyzer::new(temp_dir.path().to_path_buf());
let file_path = temp_dir.path().join("test.rs");
let content = r#"use serde::Serialize;
use tokio::runtime::Runtime;
extern crate anyhow;
use std::fs;"#;
println!("\nTest file content:\n{}", content);
println!("\nStarting analysis...\n");
let mut crate_refs = HashMap::new();
let use_regex = Regex::new(r"^\s*use\s+([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z0-9_]*)*)")?;
let extern_regex = Regex::new(r"^\s*extern\s+crate\s+([a-zA-Z_][a-zA-Z0-9_]*)")?;
analyzer.analyze_file(FileAnalysisContext {
content: content.trim().to_string(),
file_path: &file_path,
use_regex: &use_regex,
extern_regex: &extern_regex,
crate_refs: &mut crate_refs,
})?;
println!("\nAnalysis complete. Found crates:");
for (name, crate_ref) in &crate_refs {
println!("- {} (used in {} files)", name, crate_ref.usage_count());
}
assert!(
crate_refs.contains_key("serde"),
"serde dependency not found"
);
assert!(
crate_refs.contains_key("tokio"),
"tokio dependency not found"
);
assert!(
crate_refs.contains_key("anyhow"),
"anyhow dependency not found"
);
Ok(())
}
#[test]
fn test_complex_use_statements() -> Result<()> {
let temp_dir = TempDir::new()?;
let analyzer = DependencyAnalyzer::new(temp_dir.path().to_path_buf());
let file_path = temp_dir.path().join("complex_use.rs");
let content = r#"
// Simple use statement
use serde::Serialize;
// Braced use statement
use {
tokio::runtime::Runtime,
reqwest::Client,
anyhow::Result
};
// Braced use with comments
use {
//serde_json::Value,
regex::Regex,
/* rand::Rng,
chrono::DateTime */
walkdir::WalkDir
};
// Wildcard import
use clap::*;
// Mixed imports
use {
std::fs,
std::path::PathBuf,
log::*
};
"#;
println!("\nComplex test file content:\n{}", content);
println!("\nStarting analysis...\n");
let mut crate_refs = HashMap::new();
let use_regex = Regex::new(r"^\s*use\s+([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z0-9_]*)*)")?;
let extern_regex = Regex::new(r"^\s*extern\s+crate\s+([a-zA-Z_][a-zA-Z0-9_]*)")?;
analyzer.analyze_file(FileAnalysisContext {
content: content.to_string(),
file_path: &file_path,
use_regex: &use_regex,
extern_regex: &extern_regex,
crate_refs: &mut crate_refs,
})?;
println!("\nAnalysis complete. Found crates:");
for (name, crate_ref) in &crate_refs {
println!("- {}: {:?}", name, crate_ref);
}
assert!(crate_refs.contains_key("serde"), "serde should be detected");
assert!(crate_refs.contains_key("tokio"), "tokio should be detected");
assert!(
crate_refs.contains_key("reqwest"),
"reqwest should be detected"
);
assert!(
crate_refs.contains_key("anyhow"),
"anyhow should be detected"
);
assert!(crate_refs.contains_key("regex"), "regex should be detected");
assert!(
crate_refs.contains_key("walkdir"),
"walkdir should be detected"
);
assert!(crate_refs.contains_key("clap"), "clap should be detected");
assert!(crate_refs.contains_key("log"), "log should be detected");
assert!(
!crate_refs.contains_key("serde_json"),
"serde_json should not be detected (commented out)"
);
assert!(
!crate_refs.contains_key("rand"),
"rand should not be detected (commented out)"
);
assert!(
!crate_refs.contains_key("chrono"),
"chrono should not be detected (commented out)"
);
Ok(())
}
#[test]
fn test_nested_and_complex_use_statements() -> Result<()> {
let temp_dir = TempDir::new()?;
let analyzer = DependencyAnalyzer::with_debug(temp_dir.path().to_path_buf(), true);
let file_path = temp_dir.path().join("nested_use.rs");
let content = r#"
// Nested use with multiple levels
use {
serde::{Serialize, Deserialize},
tokio::{
runtime::Runtime,
sync::{Mutex, RwLock}
},
// Commented section
/*
rand::{
Rng,
distributions::Uniform
},
*/
reqwest::{Client, Response}
};
// Multiple lines with inline comments
use clap::{ // Command line parser
Command, // For creating commands
Arg, // For defining arguments
ArgMatches // For matching arguments
};
// Mixed with standard library
use {
std::{
fs::File,
io::{Read, Write},
path::{Path, PathBuf}
},
log::{debug, info, warn, error}
};
"#;
println!("\nNested test file content:\n{}", content);
println!("\nStarting analysis...\n");
let mut crate_refs = HashMap::new();
let use_regex = Regex::new(r"^\s*use\s+([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z0-9_]*)*)")?;
let extern_regex = Regex::new(r"^\s*extern\s+crate\s+([a-zA-Z_][a-zA-Z0-9_]*)")?;
analyzer.analyze_file(FileAnalysisContext {
content: content.to_string(),
file_path: &file_path,
use_regex: &use_regex,
extern_regex: &extern_regex,
crate_refs: &mut crate_refs,
})?;
println!("\nAnalysis complete. Found crates:");
for (name, crate_ref) in &crate_refs {
println!("- {}: {:?}", name, crate_ref);
}
assert!(crate_refs.contains_key("serde"), "serde should be detected");
assert!(
crate_refs.contains_key("reqwest"),
"reqwest should be detected"
);
assert!(crate_refs.contains_key("clap"), "clap should be detected");
assert!(crate_refs.contains_key("log"), "log should be detected");
if !crate_refs.contains_key("tokio") {
println!(
"NOTE: tokio was not detected. This is a known limitation of the current implementation."
);
println!("The current implementation does not fully support deeply nested imports.");
println!("This is acceptable for now, as the main goal is to detect top-level crates.");
}
assert!(
!crate_refs.contains_key("rand"),
"rand should not be detected (commented out)"
);
Ok(())
}
}