use anyhow::Result;
use serde::Serialize;
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
#[derive(Debug, Serialize, Clone)]
pub struct FileSummary {
pub path: String,
pub language: String,
pub lines: usize,
pub bytes: u64,
}
#[derive(Debug, Serialize, Clone, Default)]
pub struct LanguageStats {
pub files: usize,
pub lines: usize,
pub bytes: u64,
}
#[derive(Debug, Serialize)]
pub struct ScanReport {
pub root: String,
pub total_files: usize,
pub total_lines: usize,
pub total_bytes: u64,
pub languages: BTreeMap<String, LanguageStats>,
pub top_files_by_size: Vec<FileSummary>,
pub entry_points: Vec<String>,
pub config_files: Vec<String>,
}
const SKIP_DIRS: &[&str] = &[
"node_modules",
"target",
"dist",
"build",
".next",
".nuxt",
".turbo",
".cache",
".venv",
"venv",
"__pycache__",
".pytest_cache",
".ruff_cache",
".git",
".idea",
".vscode",
".expo",
".dart_tool",
"vendor",
"Pods",
"DerivedData",
];
const SKIP_FILES: &[&str] = &[
".DS_Store",
"Cargo.lock",
"package-lock.json",
"pnpm-lock.yaml",
"yarn.lock",
"uv.lock",
];
const ENTRY_POINT_NAMES: &[&str] = &[
"main.rs",
"main.py",
"main.ts",
"main.tsx",
"main.dart",
"main.go",
"index.ts",
"index.tsx",
"index.js",
"index.html",
"app.module.ts",
"App.tsx",
"App.tsx.tmpl",
];
const CONFIG_FILE_NAMES: &[&str] = &[
"Cargo.toml",
"package.json",
"pyproject.toml",
"pubspec.yaml",
"go.mod",
"tsconfig.json",
"vite.config.ts",
"vite.config.js",
"next.config.js",
"next.config.ts",
"nest-cli.json",
"docker-compose.yml",
"Dockerfile",
"supabase/config.toml",
".env.example",
"devist.toml",
];
pub fn scan(root: &Path) -> Result<ScanReport> {
let mut languages: BTreeMap<String, LanguageStats> = BTreeMap::new();
let mut all_files: Vec<FileSummary> = Vec::new();
let mut entry_points: Vec<String> = Vec::new();
let mut config_files: Vec<String> = Vec::new();
let mut total_files = 0;
let mut total_lines = 0;
let mut total_bytes: u64 = 0;
let walker = WalkDir::new(root).into_iter().filter_entry(|e| {
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy().to_string();
!SKIP_DIRS.contains(&name.as_str())
} else {
true
}
});
for entry in walker {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => continue,
};
if SKIP_FILES.contains(&file_name) {
continue;
}
let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
let rel_str = rel.to_string_lossy().to_string();
let metadata = match fs::metadata(path) {
Ok(m) => m,
Err(_) => continue,
};
let bytes = metadata.len();
if bytes > 5 * 1024 * 1024 {
continue;
}
let language = detect_language(path);
let lines = if is_text_language(&language) {
count_lines(path).unwrap_or(0)
} else {
0
};
let stats = languages.entry(language.clone()).or_default();
stats.files += 1;
stats.lines += lines;
stats.bytes += bytes;
total_files += 1;
total_lines += lines;
total_bytes += bytes;
if ENTRY_POINT_NAMES.contains(&file_name) {
entry_points.push(rel_str.clone());
}
if is_config_file(&rel) {
config_files.push(rel_str.clone());
}
all_files.push(FileSummary {
path: rel_str,
language,
lines,
bytes,
});
}
all_files.sort_by_key(|f| std::cmp::Reverse(f.bytes));
let top_files_by_size: Vec<FileSummary> = all_files.into_iter().take(10).collect();
entry_points.sort();
config_files.sort();
Ok(ScanReport {
root: root.to_string_lossy().to_string(),
total_files,
total_lines,
total_bytes,
languages,
top_files_by_size,
entry_points,
config_files,
})
}
fn count_lines(path: &Path) -> Result<usize> {
let content = fs::read_to_string(path)?;
Ok(content.lines().count())
}
fn detect_language(path: &Path) -> String {
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
match ext.as_str() {
"rs" => "Rust",
"py" => "Python",
"ts" | "tsx" => "TypeScript",
"js" | "jsx" | "mjs" | "cjs" => "JavaScript",
"dart" => "Dart",
"go" => "Go",
"java" => "Java",
"kt" | "kts" => "Kotlin",
"swift" => "Swift",
"rb" => "Ruby",
"php" => "PHP",
"html" | "htm" => "HTML",
"css" | "scss" | "sass" | "less" => "CSS",
"md" => "Markdown",
"json" => "JSON",
"yaml" | "yml" => "YAML",
"toml" => "TOML",
"sh" | "bash" | "zsh" => "Shell",
"sql" => "SQL",
"tmpl" => detect_tmpl_language(path),
"" => match path.file_name().and_then(|n| n.to_str()) {
Some("Dockerfile") => "Dockerfile",
Some(".env") | Some(".env.local") | Some(".env.example") => "Env",
Some(".gitignore") => "Gitignore",
_ => "Other",
},
_ => "Other",
}
.to_string()
}
fn detect_tmpl_language(path: &Path) -> &'static str {
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
let inner = Path::new(stem);
if let Some(ext) = inner.extension().and_then(|e| e.to_str()) {
return match ext.to_lowercase().as_str() {
"rs" => "Rust",
"py" => "Python",
"ts" | "tsx" => "TypeScript",
"js" | "jsx" => "JavaScript",
"dart" => "Dart",
"html" => "HTML",
"css" => "CSS",
"json" => "JSON",
"yaml" | "yml" => "YAML",
"toml" => "TOML",
"md" => "Markdown",
_ => "Template",
};
}
}
"Template"
}
fn is_text_language(lang: &str) -> bool {
!matches!(lang, "Other")
}
fn is_config_file(rel: &Path) -> bool {
let s = rel.to_string_lossy();
for name in CONFIG_FILE_NAMES {
if s == *name || s.ends_with(&format!("/{}", name)) {
return true;
}
}
false
}
pub fn build_tree(root: &Path, max_depth: usize, max_entries: usize) -> Vec<String> {
let mut lines = Vec::new();
let mut count = 0;
let walker = WalkDir::new(root)
.max_depth(max_depth)
.into_iter()
.filter_entry(|e| {
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy().to_string();
!SKIP_DIRS.contains(&name.as_str())
} else {
true
}
});
for entry in walker.flatten() {
if count >= max_entries {
lines.push(" … (truncated)".to_string());
break;
}
let depth = entry.depth();
if depth == 0 {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
let prefix = " ".repeat(depth);
let suffix = if entry.file_type().is_dir() { "/" } else { "" };
lines.push(format!("{}{}{}", prefix, name, suffix));
count += 1;
}
lines
}
pub fn human_bytes(bytes: u64) -> String {
let units = ["B", "KB", "MB", "GB"];
let mut size = bytes as f64;
let mut idx = 0;
while size >= 1024.0 && idx < units.len() - 1 {
size /= 1024.0;
idx += 1;
}
if idx == 0 {
format!("{} {}", bytes, units[0])
} else {
format!("{:.1} {}", size, units[idx])
}
}
pub fn find_path(root: &Path, query: &str) -> Option<PathBuf> {
let direct = root.join(query);
if direct.exists() {
return Some(direct);
}
for entry in WalkDir::new(root).into_iter().flatten() {
if !entry.file_type().is_file() {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
if name == query {
return Some(entry.path().to_path_buf());
}
}
None
}