#![allow(dead_code)]
use anyhow::{Context, Result};
use rayon::prelude::*;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
#[derive(Debug, Clone)]
pub struct WorkspaceInfo {
pub root: PathBuf,
pub members: Vec<PathBuf>,
pub is_virtual: bool,
}
#[derive(Debug, Clone)]
pub struct CrateInfo {
pub name: String,
pub path: PathBuf,
pub source_files: Vec<SourceFileInfo>,
pub total_lines: usize,
}
#[derive(Debug, Clone)]
pub struct SourceFileInfo {
pub path: PathBuf,
pub line_count: usize,
pub exceeds_limit: bool,
}
#[derive(Debug)]
pub struct WorkspaceAnalysis {
pub workspace: WorkspaceInfo,
pub crates: Vec<CrateInfo>,
pub files_to_refactor: Vec<SourceFileInfo>,
pub total_lines: usize,
pub stats: WorkspaceStats,
}
#[derive(Debug, Default)]
pub struct WorkspaceStats {
pub total_files: usize,
pub large_files: usize,
pub total_lines: usize,
pub avg_lines_per_file: usize,
pub largest_file: Option<(PathBuf, usize)>,
}
pub struct WorkspaceAnalyzer {
root: PathBuf,
target_lines: usize,
}
impl WorkspaceAnalyzer {
pub fn new<P: AsRef<Path>>(root: P, target_lines: usize) -> Self {
Self {
root: root.as_ref().to_path_buf(),
target_lines,
}
}
pub fn analyze(&self) -> Result<WorkspaceAnalysis> {
let workspace = self.detect_workspace()?;
let crates = self.analyze_crates(&workspace)?;
let mut files_to_refactor = Vec::new();
let mut total_lines = 0;
let mut stats = WorkspaceStats::default();
for crate_info in &crates {
total_lines += crate_info.total_lines;
stats.total_files += crate_info.source_files.len();
for file in &crate_info.source_files {
if file.exceeds_limit {
files_to_refactor.push(file.clone());
stats.large_files += 1;
}
stats.total_lines += file.line_count;
if let Some((_, current_max)) = &stats.largest_file {
if file.line_count > *current_max {
stats.largest_file = Some((file.path.clone(), file.line_count));
}
} else {
stats.largest_file = Some((file.path.clone(), file.line_count));
}
}
}
stats.avg_lines_per_file = stats
.total_lines
.checked_div(stats.total_files)
.unwrap_or(0);
Ok(WorkspaceAnalysis {
workspace,
crates,
files_to_refactor,
total_lines,
stats,
})
}
fn detect_workspace(&self) -> Result<WorkspaceInfo> {
let cargo_toml = self.root.join("Cargo.toml");
if !cargo_toml.exists() {
anyhow::bail!(
"No Cargo.toml found in {:?}\n\
Please run from a Cargo project or workspace root.",
self.root
);
}
let content = fs::read_to_string(&cargo_toml).context("Failed to read Cargo.toml")?;
let toml_value: toml::Value =
toml::from_str(&content).context("Failed to parse Cargo.toml")?;
let members = if let Some(workspace) = toml_value.get("workspace") {
if let Some(members) = workspace.get("members") {
self.expand_workspace_members(members)?
} else {
vec![self.root.clone()]
}
} else {
vec![self.root.clone()]
};
let is_virtual =
toml_value.get("package").is_none() && toml_value.get("workspace").is_some();
Ok(WorkspaceInfo {
root: self.root.clone(),
members,
is_virtual,
})
}
fn expand_workspace_members(&self, members: &toml::Value) -> Result<Vec<PathBuf>> {
let mut result = Vec::new();
if let Some(members_array) = members.as_array() {
for member in members_array {
if let Some(pattern) = member.as_str() {
if pattern.contains('*') {
let expanded = self.expand_glob_pattern(pattern)?;
result.extend(expanded);
} else {
let member_path = self.root.join(pattern);
if member_path.exists() {
result.push(member_path);
}
}
}
}
}
Ok(result)
}
fn expand_glob_pattern(&self, pattern: &str) -> Result<Vec<PathBuf>> {
let mut result = Vec::new();
let parts: Vec<&str> = pattern.split('/').collect();
if parts.len() == 2 && parts[1] == "*" {
let parent = self.root.join(parts[0]);
if parent.is_dir() {
for entry in fs::read_dir(&parent)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() && path.join("Cargo.toml").exists() {
result.push(path);
}
}
}
} else {
let path = self.root.join(pattern.replace("*", ""));
if path.exists() {
result.push(path);
}
}
Ok(result)
}
fn analyze_crates(&self, workspace: &WorkspaceInfo) -> Result<Vec<CrateInfo>> {
workspace
.members
.par_iter()
.map(|member_path| self.analyze_crate(member_path))
.collect()
}
fn analyze_crate(&self, crate_path: &Path) -> Result<CrateInfo> {
let cargo_toml = crate_path.join("Cargo.toml");
let content =
fs::read_to_string(&cargo_toml).context(format!("Failed to read {:?}", cargo_toml))?;
let toml_value: toml::Value = toml::from_str(&content)?;
let name = toml_value
.get("package")
.and_then(|p| p.get("name"))
.and_then(|n| n.as_str())
.unwrap_or("unknown")
.to_string();
let src_dir = crate_path.join("src");
let source_files = self.find_source_files(&src_dir)?;
let total_lines: usize = source_files.iter().map(|f| f.line_count).sum();
Ok(CrateInfo {
name,
path: crate_path.to_path_buf(),
source_files,
total_lines,
})
}
fn find_source_files(&self, dir: &Path) -> Result<Vec<SourceFileInfo>> {
if !dir.exists() {
return Ok(Vec::new());
}
let files: Vec<SourceFileInfo> = WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map(|ext| ext == "rs").unwrap_or(false))
.par_bridge()
.map(|entry| {
let path = entry.path().to_path_buf();
let content = fs::read_to_string(&path).unwrap_or_default();
let line_count = content.lines().count();
let exceeds_limit = line_count > self.target_lines;
SourceFileInfo {
path,
line_count,
exceeds_limit,
}
})
.collect();
Ok(files)
}
pub fn print_summary(&self, analysis: &WorkspaceAnalysis) {
println!("\n📦 Workspace Analysis");
println!("{}", "=".repeat(60));
println!("Root: {:?}", analysis.workspace.root);
println!(
"Type: {}",
if analysis.workspace.is_virtual {
"Virtual workspace"
} else {
"Single crate or workspace"
}
);
println!("Crates: {}", analysis.crates.len());
println!("\n📊 Statistics:");
println!(" Total source files: {}", analysis.stats.total_files);
println!(" Total lines of code: {}", analysis.stats.total_lines);
println!(
" Average lines per file: {}",
analysis.stats.avg_lines_per_file
);
if let Some((path, lines)) = &analysis.stats.largest_file {
println!(" Largest file: {:?} ({} lines)", path, lines);
}
if analysis.stats.large_files > 0 {
println!(
"\n⚠️ Files exceeding {} lines: {}",
self.target_lines, analysis.stats.large_files
);
for file in &analysis.files_to_refactor {
println!(" 📄 {:?} ({} lines)", file.path, file.line_count);
}
} else {
println!("\n✅ No files exceed the {} line limit", self.target_lines);
}
}
}
pub struct ParallelProcessor {
num_threads: usize,
}
impl ParallelProcessor {
pub fn new(num_threads: usize) -> Self {
Self { num_threads }
}
pub fn configure_pool(&self) -> Result<()> {
if self.num_threads > 0 {
rayon::ThreadPoolBuilder::new()
.num_threads(self.num_threads)
.build_global()
.ok(); }
Ok(())
}
pub fn process_files<F, T>(&self, files: Vec<PathBuf>, processor: F) -> Vec<Result<T>>
where
F: Fn(&Path) -> Result<T> + Sync + Send,
T: Send,
{
files.par_iter().map(|path| processor(path)).collect()
}
}
#[derive(Debug)]
pub struct ProcessingResult {
pub succeeded: Vec<PathBuf>,
pub failed: Vec<(PathBuf, String)>,
pub elapsed_ms: u64,
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_workspace_analyzer_single_crate() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("Cargo.toml"),
r#"
[package]
name = "test-crate"
version = "0.1.0"
edition = "2021"
"#,
)
.unwrap();
let src_dir = temp_dir.path().join("src");
fs::create_dir_all(&src_dir).unwrap();
fs::write(
src_dir.join("main.rs"),
"fn main() {\n println!(\"Hello\");\n}\n",
)
.unwrap();
let analyzer = WorkspaceAnalyzer::new(temp_dir.path(), 100);
let analysis = analyzer.analyze().unwrap();
assert_eq!(analysis.crates.len(), 1);
assert_eq!(analysis.crates[0].name, "test-crate");
assert_eq!(analysis.stats.total_files, 1);
}
#[test]
fn test_workspace_analyzer_with_workspace() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("Cargo.toml"),
r#"
[workspace]
members = ["crate_a", "crate_b"]
"#,
)
.unwrap();
let crate_a = temp_dir.path().join("crate_a");
fs::create_dir_all(&crate_a).unwrap();
fs::write(
crate_a.join("Cargo.toml"),
r#"
[package]
name = "crate-a"
version = "0.1.0"
"#,
)
.unwrap();
let src_a = crate_a.join("src");
fs::create_dir_all(&src_a).unwrap();
fs::write(src_a.join("lib.rs"), "pub fn foo() {}\n").unwrap();
let crate_b = temp_dir.path().join("crate_b");
fs::create_dir_all(&crate_b).unwrap();
fs::write(
crate_b.join("Cargo.toml"),
r#"
[package]
name = "crate-b"
version = "0.1.0"
"#,
)
.unwrap();
let src_b = crate_b.join("src");
fs::create_dir_all(&src_b).unwrap();
fs::write(src_b.join("lib.rs"), "pub fn bar() {}\n").unwrap();
let analyzer = WorkspaceAnalyzer::new(temp_dir.path(), 100);
let analysis = analyzer.analyze().unwrap();
assert_eq!(analysis.crates.len(), 2);
assert!(analysis.workspace.is_virtual);
}
#[test]
fn test_large_file_detection() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("Cargo.toml"),
r#"
[package]
name = "test-crate"
version = "0.1.0"
"#,
)
.unwrap();
let src_dir = temp_dir.path().join("src");
fs::create_dir_all(&src_dir).unwrap();
let large_content = (0..200)
.map(|i| format!("fn func_{}() {{}}\n", i))
.collect::<String>();
fs::write(src_dir.join("main.rs"), &large_content).unwrap();
let analyzer = WorkspaceAnalyzer::new(temp_dir.path(), 100);
let analysis = analyzer.analyze().unwrap();
assert_eq!(analysis.stats.large_files, 1);
assert_eq!(analysis.files_to_refactor.len(), 1);
}
#[test]
fn test_parallel_processor() {
let processor = ParallelProcessor::new(4);
let files = vec![PathBuf::from("/tmp/a.rs"), PathBuf::from("/tmp/b.rs")];
let results: Vec<Result<String>> =
processor.process_files(files.clone(), |path| Ok(path.to_string_lossy().to_string()));
assert_eq!(results.len(), 2);
}
}