Skip to main content

code_analyze_core/
traversal.rs

1//! Directory traversal with .gitignore support.
2//!
3//! Provides recursive directory walking with automatic filtering based on `.gitignore` and `.ignore` files.
4//! Uses the `ignore` crate for cross-platform, efficient file system traversal.
5
6use ignore::WalkBuilder;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9use std::time::Instant;
10use thiserror::Error;
11use tracing::instrument;
12
13#[derive(Debug, Clone)]
14pub struct WalkEntry {
15    pub path: PathBuf,
16    /// Depth in the directory tree (0 = root).
17    pub depth: usize,
18    pub is_dir: bool,
19    pub is_symlink: bool,
20    pub symlink_target: Option<PathBuf>,
21}
22
23#[derive(Debug, Error)]
24#[non_exhaustive]
25pub enum TraversalError {
26    #[error("IO error: {0}")]
27    Io(#[from] std::io::Error),
28    #[error("internal concurrency error: {0}")]
29    Internal(String),
30}
31
32/// Walk a directory with support for `.gitignore` and `.ignore`.
33/// `max_depth=0` maps to unlimited recursion (None), positive values limit depth.
34/// The returned entries are sorted lexicographically by path.
35#[instrument(skip_all, fields(path = %root.display(), max_depth))]
36pub fn walk_directory(
37    root: &Path,
38    max_depth: Option<u32>,
39) -> Result<Vec<WalkEntry>, TraversalError> {
40    let start = Instant::now();
41    let mut builder = WalkBuilder::new(root);
42    builder.hidden(true).standard_filters(true);
43
44    // Map max_depth: 0 = unlimited (None), positive = Some(n)
45    if let Some(depth) = max_depth
46        && depth > 0
47    {
48        builder.max_depth(Some(depth as usize));
49    }
50
51    let entries = Arc::new(Mutex::new(Vec::new()));
52    let entries_clone = Arc::clone(&entries);
53
54    builder.build_parallel().run(move || {
55        let entries = Arc::clone(&entries_clone);
56        Box::new(move |result| match result {
57            Ok(entry) => {
58                let path = entry.path().to_path_buf();
59                let depth = entry.depth();
60                let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
61                let is_symlink = entry.path_is_symlink();
62
63                let symlink_target = if is_symlink {
64                    std::fs::read_link(&path).ok()
65                } else {
66                    None
67                };
68
69                let walk_entry = WalkEntry {
70                    path,
71                    depth,
72                    is_dir,
73                    is_symlink,
74                    symlink_target,
75                };
76                let Ok(mut guard) = entries.lock() else {
77                    tracing::debug!("mutex poisoned in parallel walker, skipping entry");
78                    return ignore::WalkState::Skip;
79                };
80                guard.push(walk_entry);
81                ignore::WalkState::Continue
82            }
83            Err(e) => {
84                tracing::warn!(error = %e, "skipping unreadable entry");
85                ignore::WalkState::Continue
86            }
87        })
88    });
89
90    let mut entries = Arc::try_unwrap(entries)
91        .map_err(|_| {
92            TraversalError::Internal("arc unwrap failed: strong references still live".to_string())
93        })?
94        .into_inner()
95        .map_err(|_| TraversalError::Internal("mutex poisoned".to_string()))?;
96
97    let dir_count = entries.iter().filter(|e| e.is_dir).count();
98    let file_count = entries.iter().filter(|e| !e.is_dir).count();
99
100    tracing::debug!(
101        entries = entries.len(),
102        dirs = dir_count,
103        files = file_count,
104        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
105        "walk complete"
106    );
107
108    // Restore sort contract: walk_parallel does not guarantee order.
109    entries.sort_by(|a, b| a.path.cmp(&b.path));
110    Ok(entries)
111}
112
113/// Compute files-per-depth-1-subdirectory counts from an already-collected entry list.
114/// Returns a Vec of (depth-1 path, file count) sorted by path.
115/// Only counts file entries (not directories); skips entries containing `EXCLUDED_DIRS` components.
116/// Output Vec is sorted by construction (entries are pre-sorted by path).
117#[must_use]
118pub fn subtree_counts_from_entries(root: &Path, entries: &[WalkEntry]) -> Vec<(PathBuf, usize)> {
119    let mut counts: Vec<(PathBuf, usize)> = Vec::new();
120    for entry in entries {
121        if entry.is_dir {
122            continue;
123        }
124        // Skip entries whose path components contain EXCLUDED_DIRS
125        if entry.path.components().any(|c| {
126            let s = c.as_os_str().to_string_lossy();
127            crate::EXCLUDED_DIRS.contains(&s.as_ref())
128        }) {
129            continue;
130        }
131        let Ok(rel) = entry.path.strip_prefix(root) else {
132            continue;
133        };
134        if let Some(first) = rel.components().next() {
135            let key = root.join(first);
136            match counts.last_mut() {
137                Some(last) if last.0 == key => last.1 += 1,
138                _ => counts.push((key, 1)),
139            }
140        }
141    }
142    counts
143}