use std::collections::{HashMap, HashSet};
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
use rayon::Scope;
use crate::cli::Cli;
use crate::model::{DirData, LanguageCounts, ScanResult};
pub fn load_gitignore_patterns(root: &Path) -> Vec<String> {
let mut out = vec![];
let path = root.join(".gitignore");
let Ok(content) = fs::read_to_string(path) else {
return out;
};
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
out.push(trimmed.to_string());
}
out
}
const DEFAULT_SKIP: [&str; 7] = [
".git",
"target",
"build",
"out",
"node_modules",
"third_party",
"dist",
];
struct ScanConfig {
root: PathBuf,
extensions: HashSet<String>,
follow_symlinks: bool,
hidden: bool,
exclude: Vec<String>,
gitignore: Vec<String>,
default_skip: Vec<String>,
}
struct ScanState {
dirs: HashMap<PathBuf, DirData>,
total_files: usize,
dirs_with_files: HashSet<PathBuf>,
language_counts: HashMap<String, usize>,
visited: HashSet<PathBuf>,
first_error: Option<String>,
}
fn should_exclude_config(rel: &Path, config: &ScanConfig) -> bool {
let rel_str = rel.to_string_lossy();
let comps = rel
.components()
.map(|c| c.as_os_str().to_string_lossy())
.collect::<Vec<_>>();
if !config.hidden && comps.iter().any(|c| c.starts_with('.')) {
return true;
}
if comps
.iter()
.any(|c| config.default_skip.iter().any(|s| s.as_str() == c.as_ref()))
{
return true;
}
if config
.exclude
.iter()
.any(|pat| simple_match(&rel_str, pat))
{
return true;
}
if config
.gitignore
.iter()
.any(|pat| simple_match(&rel_str, pat))
{
return true;
}
false
}
fn simple_match(path: &str, pattern: &str) -> bool {
if pattern == "*" {
return true;
}
if let Some(prefix) = pattern.strip_suffix("/*") {
return path.starts_with(prefix);
}
if let Some(suffix) = pattern.strip_prefix("*.") {
return path.ends_with(suffix);
}
path.contains(pattern.trim_matches('/'))
}
fn scan_dir<'scope>(
scope: &Scope<'scope>,
current: PathBuf,
config: Arc<ScanConfig>,
state: Arc<Mutex<ScanState>>,
has_error: &'scope AtomicBool,
) {
if has_error.load(Ordering::Relaxed) {
return;
}
if config.follow_symlinks {
let canon = current.canonicalize().unwrap_or_else(|_| current.clone());
let skip = {
let mut st = state.lock().unwrap();
if !st.visited.insert(canon) {
true
} else {
false
}
};
if skip {
return;
}
}
let entries = match fs::read_dir(¤t) {
Ok(e) => e,
Err(e) => {
let msg = format!("failed to read {}: {e}", current.display());
let mut st = state.lock().unwrap();
if st.first_error.is_none() {
st.first_error = Some(msg);
}
has_error.store(true, Ordering::Relaxed);
return;
}
};
let root = config.root.clone();
let mut subdirs = Vec::new();
for entry in entries {
let entry = match entry {
Ok(v) => v,
Err(_) => continue,
};
let path = entry.path();
let rel = path.strip_prefix(&root).unwrap_or(&path);
if should_exclude_config(rel, &config) {
continue;
}
let metadata = if config.follow_symlinks {
fs::metadata(&path)
} else {
fs::symlink_metadata(&path)
};
let Ok(meta) = metadata else { continue };
let ft = meta.file_type();
if ft.is_symlink() && !config.follow_symlinks {
continue;
}
if ft.is_dir() {
let path_buf = path.to_path_buf();
let name = path_buf
.file_name()
.unwrap_or_else(|| OsStr::new("."))
.to_string_lossy()
.to_string();
{
let mut st = state.lock().unwrap();
st.dirs
.entry(path_buf.clone())
.or_insert_with(|| DirData {
name,
..DirData::default()
});
if let Some(parent) = path_buf.parent() {
st.dirs
.entry(parent.to_path_buf())
.or_default()
.children
.insert(path_buf.clone());
}
}
subdirs.push(path_buf);
continue;
}
if ft.is_file() {
let ext = path
.extension()
.and_then(|v| v.to_str())
.map(|v| v.to_ascii_lowercase());
if let Some(ext) = ext && config.extensions.contains(&ext) {
let parent = path
.parent()
.map(Path::to_path_buf)
.unwrap_or_else(|| root.clone());
let mut st = state.lock().unwrap();
st.total_files += 1;
st.dirs
.entry(parent.clone())
.or_default()
.direct_files += 1;
if parent != root {
st.dirs_with_files.insert(parent);
}
if let Some(canonical) = crate::lang::canonical_language_for_extension(&ext) {
*st.language_counts.entry(canonical).or_insert(0) += 1;
}
}
}
}
for path_buf in subdirs {
let config = Arc::clone(&config);
let state = Arc::clone(&state);
scope.spawn(move |s| scan_dir(s, path_buf, config, state, has_error));
}
}
pub fn scan_tree(
root: &Path,
extensions: &HashSet<String>,
cli: &Cli,
gitignore: &[String],
) -> Result<ScanResult, String> {
let default_skip = DEFAULT_SKIP
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>();
let config = Arc::new(ScanConfig {
root: root.to_path_buf(),
extensions: extensions.clone(),
follow_symlinks: cli.follow_symlinks,
hidden: cli.hidden,
exclude: cli.exclude.clone(),
gitignore: gitignore.to_vec(),
default_skip,
});
let mut dirs = HashMap::new();
dirs.insert(
root.to_path_buf(),
DirData {
name: root
.file_name()
.unwrap_or_else(|| OsStr::new("."))
.to_string_lossy()
.to_string(),
..DirData::default()
},
);
let state = Arc::new(Mutex::new(ScanState {
dirs,
total_files: 0,
dirs_with_files: HashSet::new(),
language_counts: HashMap::new(),
visited: HashSet::new(),
first_error: None,
}));
let has_error = AtomicBool::new(false);
rayon::scope(|scope| {
scan_dir(
scope,
root.to_path_buf(),
config,
Arc::clone(&state),
&has_error,
);
});
let mut state = state.lock().unwrap();
if let Some(err) = state.first_error.take() {
return Err(err);
}
Ok(ScanResult {
root: root.to_path_buf(),
dirs: std::mem::take(&mut state.dirs),
total_files: state.total_files,
dirs_with_files: state.dirs_with_files.len(),
language_counts: LanguageCounts(std::mem::take(&mut state.language_counts)),
})
}
pub fn compute_tree_counts(
root: &Path,
dirs: &HashMap<PathBuf, DirData>,
) -> HashMap<PathBuf, usize> {
fn dfs(
path: &Path,
dirs: &HashMap<PathBuf, DirData>,
memo: &mut HashMap<PathBuf, usize>,
) -> usize {
if let Some(v) = memo.get(path) {
return *v;
}
let mut sum = dirs.get(path).map_or(0, |d| d.direct_files);
if let Some(dir) = dirs.get(path) {
for child in &dir.children {
sum += dfs(child, dirs, memo);
}
}
memo.insert(path.to_path_buf(), sum);
sum
}
let mut memo = HashMap::new();
dfs(root, dirs, &mut memo);
memo
}