Skip to main content

code_analyze_core/
traversal.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Directory traversal with .gitignore support.
4//!
5//! Provides recursive directory walking with automatic filtering based on `.gitignore` and `.ignore` files.
6//! Uses the `ignore` crate for cross-platform, efficient file system traversal.
7
8use ignore::WalkBuilder;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, Mutex};
11use std::time::Instant;
12use thiserror::Error;
13use tracing::instrument;
14
15#[derive(Debug, Clone)]
16pub struct WalkEntry {
17    pub path: PathBuf,
18    /// Depth in the directory tree (0 = root).
19    pub depth: usize,
20    pub is_dir: bool,
21    pub is_symlink: bool,
22    pub symlink_target: Option<PathBuf>,
23}
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum TraversalError {
28    #[error("IO error: {0}")]
29    Io(#[from] std::io::Error),
30    #[error("internal concurrency error: {0}")]
31    Internal(String),
32}
33
34/// Walk a directory with support for `.gitignore` and `.ignore`.
35/// `max_depth=0` maps to unlimited recursion (None), positive values limit depth.
36/// The returned entries are sorted lexicographically by path.
37#[instrument(skip_all, fields(path = %root.display(), max_depth))]
38pub fn walk_directory(
39    root: &Path,
40    max_depth: Option<u32>,
41) -> Result<Vec<WalkEntry>, TraversalError> {
42    let start = Instant::now();
43    let mut builder = WalkBuilder::new(root);
44    builder.hidden(true).standard_filters(true);
45
46    // Map max_depth: 0 = unlimited (None), positive = Some(n)
47    if let Some(depth) = max_depth
48        && depth > 0
49    {
50        builder.max_depth(Some(depth as usize));
51    }
52
53    let entries = Arc::new(Mutex::new(Vec::new()));
54    let entries_clone = Arc::clone(&entries);
55
56    builder.build_parallel().run(move || {
57        let entries = Arc::clone(&entries_clone);
58        Box::new(move |result| match result {
59            Ok(entry) => {
60                let path = entry.path().to_path_buf();
61                let depth = entry.depth();
62                let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
63                let is_symlink = entry.path_is_symlink();
64
65                let symlink_target = if is_symlink {
66                    std::fs::read_link(&path).ok()
67                } else {
68                    None
69                };
70
71                let walk_entry = WalkEntry {
72                    path,
73                    depth,
74                    is_dir,
75                    is_symlink,
76                    symlink_target,
77                };
78                let Ok(mut guard) = entries.lock() else {
79                    tracing::debug!("mutex poisoned in parallel walker, skipping entry");
80                    return ignore::WalkState::Skip;
81                };
82                guard.push(walk_entry);
83                ignore::WalkState::Continue
84            }
85            Err(e) => {
86                tracing::warn!(error = %e, "skipping unreadable entry");
87                ignore::WalkState::Continue
88            }
89        })
90    });
91
92    let mut entries = Arc::try_unwrap(entries)
93        .map_err(|_| {
94            TraversalError::Internal("arc unwrap failed: strong references still live".to_string())
95        })?
96        .into_inner()
97        .map_err(|_| TraversalError::Internal("mutex poisoned".to_string()))?;
98
99    let dir_count = entries.iter().filter(|e| e.is_dir).count();
100    let file_count = entries.iter().filter(|e| !e.is_dir).count();
101
102    tracing::debug!(
103        entries = entries.len(),
104        dirs = dir_count,
105        files = file_count,
106        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
107        "walk complete"
108    );
109
110    // Restore sort contract: walk_parallel does not guarantee order.
111    entries.sort_by(|a, b| a.path.cmp(&b.path));
112    Ok(entries)
113}
114
115/// Compute files-per-depth-1-subdirectory counts from an already-collected entry list.
116/// Returns a Vec of (depth-1 path, file count) sorted by path.
117/// Only counts file entries (not directories); skips entries containing `EXCLUDED_DIRS` components.
118/// Output Vec is sorted by construction (entries are pre-sorted by path).
119#[must_use]
120pub fn subtree_counts_from_entries(root: &Path, entries: &[WalkEntry]) -> Vec<(PathBuf, usize)> {
121    let mut counts: Vec<(PathBuf, usize)> = Vec::new();
122    for entry in entries {
123        if entry.is_dir {
124            continue;
125        }
126        // Skip entries whose path components contain EXCLUDED_DIRS
127        if entry.path.components().any(|c| {
128            let s = c.as_os_str().to_string_lossy();
129            crate::EXCLUDED_DIRS.contains(&s.as_ref())
130        }) {
131            continue;
132        }
133        let Ok(rel) = entry.path.strip_prefix(root) else {
134            continue;
135        };
136        if let Some(first) = rel.components().next() {
137            let key = root.join(first);
138            match counts.last_mut() {
139                Some(last) if last.0 == key => last.1 += 1,
140                _ => counts.push((key, 1)),
141            }
142        }
143    }
144    counts
145}