treestat 1.1.0

A CLI that displays source file counts in a tree view by directory and language
Documentation
use std::collections::{HashMap, HashSet};
use std::ffi::OsStr;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};

use rayon::Scope;
use crate::cli::Cli;
use crate::model::{DirData, LanguageCounts, ScanResult};

pub fn load_gitignore_patterns(root: &Path) -> Vec<String> {
    let mut out = vec![];
    let path = root.join(".gitignore");
    let Ok(content) = fs::read_to_string(path) else {
        return out;
    };
    for line in content.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() || trimmed.starts_with('#') {
            continue;
        }
        out.push(trimmed.to_string());
    }
    out
}

const DEFAULT_SKIP: [&str; 7] = [
    ".git",
    "target",
    "build",
    "out",
    "node_modules",
    "third_party",
    "dist",
];

/// Immutable config shared across all parallel scan tasks.
struct ScanConfig {
    root: PathBuf,
    extensions: HashSet<String>,
    follow_symlinks: bool,
    hidden: bool,
    exclude: Vec<String>,
    gitignore: Vec<String>,
    default_skip: Vec<String>,
}

/// Shared mutable state. All updates go through a single Mutex; I/O is done outside the lock
/// to minimize contention. first_error records the first fatal read_dir failure; visited
/// is used only when follow_symlinks is true for canonical-path dedup. Children stay in
/// BTreeSet so output order is stable regardless of discovery order.
struct ScanState {
    dirs: HashMap<PathBuf, DirData>,
    total_files: usize,
    dirs_with_files: HashSet<PathBuf>,
    language_counts: HashMap<String, usize>,
    visited: HashSet<PathBuf>,
    first_error: Option<String>,
}

fn should_exclude_config(rel: &Path, config: &ScanConfig) -> bool {
    let rel_str = rel.to_string_lossy();
    let comps = rel
        .components()
        .map(|c| c.as_os_str().to_string_lossy())
        .collect::<Vec<_>>();

    if !config.hidden && comps.iter().any(|c| c.starts_with('.')) {
        return true;
    }
    if comps
        .iter()
        .any(|c| config.default_skip.iter().any(|s| s.as_str() == c.as_ref()))
    {
        return true;
    }
    if config
        .exclude
        .iter()
        .any(|pat| simple_match(&rel_str, pat))
    {
        return true;
    }
    if config
        .gitignore
        .iter()
        .any(|pat| simple_match(&rel_str, pat))
    {
        return true;
    }

    false
}

fn simple_match(path: &str, pattern: &str) -> bool {
    if pattern == "*" {
        return true;
    }
    if let Some(prefix) = pattern.strip_suffix("/*") {
        return path.starts_with(prefix);
    }
    if let Some(suffix) = pattern.strip_prefix("*.") {
        return path.ends_with(suffix);
    }
    path.contains(pattern.trim_matches('/'))
}

fn scan_dir<'scope>(
    scope: &Scope<'scope>,
    current: PathBuf,
    config: Arc<ScanConfig>,
    state: Arc<Mutex<ScanState>>,
    has_error: &'scope AtomicBool,
) {
    if has_error.load(Ordering::Relaxed) {
        return;
    }

    if config.follow_symlinks {
        let canon = current.canonicalize().unwrap_or_else(|_| current.clone());
        let skip = {
            let mut st = state.lock().unwrap();
            if !st.visited.insert(canon) {
                true
            } else {
                false
            }
        };
        if skip {
            return;
        }
    }

    let entries = match fs::read_dir(&current) {
        Ok(e) => e,
        Err(e) => {
            let msg = format!("failed to read {}: {e}", current.display());
            let mut st = state.lock().unwrap();
            if st.first_error.is_none() {
                st.first_error = Some(msg);
            }
            has_error.store(true, Ordering::Relaxed);
            return;
        }
    };

    let root = config.root.clone();

    let mut subdirs = Vec::new();

    for entry in entries {
        let entry = match entry {
            Ok(v) => v,
            Err(_) => continue,
        };
        let path = entry.path();
        let rel = path.strip_prefix(&root).unwrap_or(&path);
        if should_exclude_config(rel, &config) {
            continue;
        }

        let metadata = if config.follow_symlinks {
            fs::metadata(&path)
        } else {
            fs::symlink_metadata(&path)
        };
        let Ok(meta) = metadata else { continue };
        let ft = meta.file_type();

        if ft.is_symlink() && !config.follow_symlinks {
            continue;
        }

        if ft.is_dir() {
            let path_buf = path.to_path_buf();
            let name = path_buf
                .file_name()
                .unwrap_or_else(|| OsStr::new("."))
                .to_string_lossy()
                .to_string();
            {
                let mut st = state.lock().unwrap();
                st.dirs
                    .entry(path_buf.clone())
                    .or_insert_with(|| DirData {
                        name,
                        ..DirData::default()
                    });
                if let Some(parent) = path_buf.parent() {
                    st.dirs
                        .entry(parent.to_path_buf())
                        .or_default()
                        .children
                        .insert(path_buf.clone());
                }
            }
            subdirs.push(path_buf);
            continue;
        }

        if ft.is_file() {
            let ext = path
                .extension()
                .and_then(|v| v.to_str())
                .map(|v| v.to_ascii_lowercase());
            if let Some(ext) = ext && config.extensions.contains(&ext) {
                let parent = path
                    .parent()
                    .map(Path::to_path_buf)
                    .unwrap_or_else(|| root.clone());
                let mut st = state.lock().unwrap();
                st.total_files += 1;
                st.dirs
                    .entry(parent.clone())
                    .or_default()
                    .direct_files += 1;
                if parent != root {
                    st.dirs_with_files.insert(parent);
                }
                if let Some(canonical) = crate::lang::canonical_language_for_extension(&ext) {
                    *st.language_counts.entry(canonical).or_insert(0) += 1;
                }
            }
        }
    }

    for path_buf in subdirs {
        let config = Arc::clone(&config);
        let state = Arc::clone(&state);
        scope.spawn(move |s| scan_dir(s, path_buf, config, state, has_error));
    }
}

pub fn scan_tree(
    root: &Path,
    extensions: &HashSet<String>,
    cli: &Cli,
    gitignore: &[String],
) -> Result<ScanResult, String> {
    let default_skip = DEFAULT_SKIP
        .iter()
        .map(|s| s.to_string())
        .collect::<Vec<_>>();

    let config = Arc::new(ScanConfig {
        root: root.to_path_buf(),
        extensions: extensions.clone(),
        follow_symlinks: cli.follow_symlinks,
        hidden: cli.hidden,
        exclude: cli.exclude.clone(),
        gitignore: gitignore.to_vec(),
        default_skip,
    });

    let mut dirs = HashMap::new();
    dirs.insert(
        root.to_path_buf(),
        DirData {
            name: root
                .file_name()
                .unwrap_or_else(|| OsStr::new("."))
                .to_string_lossy()
                .to_string(),
            ..DirData::default()
        },
    );

    let state = Arc::new(Mutex::new(ScanState {
        dirs,
        total_files: 0,
        dirs_with_files: HashSet::new(),
        language_counts: HashMap::new(),
        visited: HashSet::new(),
        first_error: None,
    }));

    let has_error = AtomicBool::new(false);

    rayon::scope(|scope| {
        scan_dir(
            scope,
            root.to_path_buf(),
            config,
            Arc::clone(&state),
            &has_error,
        );
    });

    let mut state = state.lock().unwrap();
    if let Some(err) = state.first_error.take() {
        return Err(err);
    }

    Ok(ScanResult {
        root: root.to_path_buf(),
        dirs: std::mem::take(&mut state.dirs),
        total_files: state.total_files,
        dirs_with_files: state.dirs_with_files.len(),
        language_counts: LanguageCounts(std::mem::take(&mut state.language_counts)),
    })
}

pub fn compute_tree_counts(
    root: &Path,
    dirs: &HashMap<PathBuf, DirData>,
) -> HashMap<PathBuf, usize> {
    fn dfs(
        path: &Path,
        dirs: &HashMap<PathBuf, DirData>,
        memo: &mut HashMap<PathBuf, usize>,
    ) -> usize {
        if let Some(v) = memo.get(path) {
            return *v;
        }
        let mut sum = dirs.get(path).map_or(0, |d| d.direct_files);
        if let Some(dir) = dirs.get(path) {
            for child in &dir.children {
                sum += dfs(child, dirs, memo);
            }
        }
        memo.insert(path.to_path_buf(), sum);
        sum
    }

    let mut memo = HashMap::new();
    dfs(root, dirs, &mut memo);
    memo
}