Skip to main content

code_analyze_mcp/
traversal.rs

1//! Directory traversal with .gitignore support.
2//!
3//! Provides recursive directory walking with automatic filtering based on `.gitignore` and `.ignore` files.
4//! Uses the `ignore` crate for cross-platform, efficient file system traversal.
5
6use ignore::WalkBuilder;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9use std::time::Instant;
10use thiserror::Error;
11use tracing::instrument;
12
13#[derive(Debug, Clone)]
14pub struct WalkEntry {
15    pub path: PathBuf,
16    /// Depth in the directory tree (0 = root).
17    pub depth: usize,
18    pub is_dir: bool,
19    pub is_symlink: bool,
20    pub symlink_target: Option<PathBuf>,
21}
22
23#[derive(Debug, Error)]
24pub enum TraversalError {
25    #[error("IO error: {0}")]
26    Io(#[from] std::io::Error),
27    #[error("internal concurrency error: {0}")]
28    Internal(String),
29}
30
31/// Walk a directory with support for .gitignore and .ignore.
32/// max_depth=0 maps to unlimited recursion (None), positive values limit depth.
33/// The returned entries are sorted lexicographically by path.
34#[instrument(skip_all, fields(path = %root.display(), max_depth))]
35pub fn walk_directory(
36    root: &Path,
37    max_depth: Option<u32>,
38) -> Result<Vec<WalkEntry>, TraversalError> {
39    let start = Instant::now();
40    let mut builder = WalkBuilder::new(root);
41    builder.hidden(true).standard_filters(true);
42
43    // Map max_depth: 0 = unlimited (None), positive = Some(n)
44    if let Some(depth) = max_depth
45        && depth > 0
46    {
47        builder.max_depth(Some(depth as usize));
48    }
49
50    let entries = Arc::new(Mutex::new(Vec::new()));
51    let entries_clone = Arc::clone(&entries);
52
53    builder.build_parallel().run(move || {
54        let entries = Arc::clone(&entries_clone);
55        Box::new(move |result| match result {
56            Ok(entry) => {
57                let path = entry.path().to_path_buf();
58                let depth = entry.depth();
59                let is_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
60                let is_symlink = entry.path_is_symlink();
61
62                let symlink_target = if is_symlink {
63                    std::fs::read_link(&path).ok()
64                } else {
65                    None
66                };
67
68                let walk_entry = WalkEntry {
69                    path,
70                    depth,
71                    is_dir,
72                    is_symlink,
73                    symlink_target,
74                };
75                entries.lock().unwrap().push(walk_entry);
76                ignore::WalkState::Continue
77            }
78            Err(e) => {
79                tracing::warn!(error = %e, "skipping unreadable entry");
80                ignore::WalkState::Continue
81            }
82        })
83    });
84
85    let mut entries = Arc::try_unwrap(entries)
86        .map_err(|_| {
87            TraversalError::Internal("arc unwrap failed: strong references still live".to_string())
88        })?
89        .into_inner()
90        .map_err(|_| TraversalError::Internal("mutex poisoned".to_string()))?;
91
92    let dir_count = entries.iter().filter(|e| e.is_dir).count();
93    let file_count = entries.iter().filter(|e| !e.is_dir).count();
94
95    tracing::debug!(
96        entries = entries.len(),
97        dirs = dir_count,
98        files = file_count,
99        duration_ms = start.elapsed().as_millis() as u64,
100        "walk complete"
101    );
102
103    // Restore sort contract: walk_parallel does not guarantee order.
104    entries.sort_by(|a, b| a.path.cmp(&b.path));
105    Ok(entries)
106}
107
108/// Compute files-per-depth-1-subdirectory counts from an already-collected entry list.
109/// Returns a Vec of (depth-1 path, file count) sorted by path.
110/// Only counts file entries (not directories); skips entries containing EXCLUDED_DIRS components.
111/// Output Vec is sorted by construction (entries are pre-sorted by path).
112pub fn subtree_counts_from_entries(root: &Path, entries: &[WalkEntry]) -> Vec<(PathBuf, usize)> {
113    let mut counts: Vec<(PathBuf, usize)> = Vec::new();
114    for entry in entries {
115        if entry.is_dir {
116            continue;
117        }
118        // Skip entries whose path components contain EXCLUDED_DIRS
119        if entry.path.components().any(|c| {
120            let s = c.as_os_str().to_string_lossy();
121            crate::EXCLUDED_DIRS.contains(&s.as_ref())
122        }) {
123            continue;
124        }
125        let rel = match entry.path.strip_prefix(root) {
126            Ok(r) => r,
127            Err(_) => continue,
128        };
129        if let Some(first) = rel.components().next() {
130            let key = root.join(first);
131            match counts.last_mut() {
132                Some(last) if last.0 == key => last.1 += 1,
133                _ => counts.push((key, 1)),
134            }
135        }
136    }
137    counts
138}