code_analyze_core/
traversal.rs1use ignore::WalkBuilder;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9use std::time::Instant;
10use thiserror::Error;
11use tracing::instrument;
12
13#[derive(Debug, Clone)]
14pub struct WalkEntry {
15 pub path: PathBuf,
16 pub depth: usize,
18 pub is_dir: bool,
19 pub is_symlink: bool,
20 pub symlink_target: Option<PathBuf>,
21}
22
23#[derive(Debug, Error)]
24#[non_exhaustive]
25pub enum TraversalError {
26 #[error("IO error: {0}")]
27 Io(#[from] std::io::Error),
28 #[error("internal concurrency error: {0}")]
29 Internal(String),
30}
31
32#[instrument(skip_all, fields(path = %root.display(), max_depth))]
36pub fn walk_directory(
37 root: &Path,
38 max_depth: Option<u32>,
39) -> Result<Vec<WalkEntry>, TraversalError> {
40 let start = Instant::now();
41 let mut builder = WalkBuilder::new(root);
42 builder.hidden(true).standard_filters(true);
43
44 if let Some(depth) = max_depth
46 && depth > 0
47 {
48 builder.max_depth(Some(depth as usize));
49 }
50
51 let entries = Arc::new(Mutex::new(Vec::new()));
52 let entries_clone = Arc::clone(&entries);
53
54 builder.build_parallel().run(move || {
55 let entries = Arc::clone(&entries_clone);
56 Box::new(move |result| match result {
57 Ok(entry) => {
58 let path = entry.path().to_path_buf();
59 let depth = entry.depth();
60 let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
61 let is_symlink = entry.path_is_symlink();
62
63 let symlink_target = if is_symlink {
64 std::fs::read_link(&path).ok()
65 } else {
66 None
67 };
68
69 let walk_entry = WalkEntry {
70 path,
71 depth,
72 is_dir,
73 is_symlink,
74 symlink_target,
75 };
76 let Ok(mut guard) = entries.lock() else {
77 tracing::debug!("mutex poisoned in parallel walker, skipping entry");
78 return ignore::WalkState::Skip;
79 };
80 guard.push(walk_entry);
81 ignore::WalkState::Continue
82 }
83 Err(e) => {
84 tracing::warn!(error = %e, "skipping unreadable entry");
85 ignore::WalkState::Continue
86 }
87 })
88 });
89
90 let mut entries = Arc::try_unwrap(entries)
91 .map_err(|_| {
92 TraversalError::Internal("arc unwrap failed: strong references still live".to_string())
93 })?
94 .into_inner()
95 .map_err(|_| TraversalError::Internal("mutex poisoned".to_string()))?;
96
97 let dir_count = entries.iter().filter(|e| e.is_dir).count();
98 let file_count = entries.iter().filter(|e| !e.is_dir).count();
99
100 tracing::debug!(
101 entries = entries.len(),
102 dirs = dir_count,
103 files = file_count,
104 duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
105 "walk complete"
106 );
107
108 entries.sort_by(|a, b| a.path.cmp(&b.path));
110 Ok(entries)
111}
112
113#[must_use]
118pub fn subtree_counts_from_entries(root: &Path, entries: &[WalkEntry]) -> Vec<(PathBuf, usize)> {
119 let mut counts: Vec<(PathBuf, usize)> = Vec::new();
120 for entry in entries {
121 if entry.is_dir {
122 continue;
123 }
124 if entry.path.components().any(|c| {
126 let s = c.as_os_str().to_string_lossy();
127 crate::EXCLUDED_DIRS.contains(&s.as_ref())
128 }) {
129 continue;
130 }
131 let Ok(rel) = entry.path.strip_prefix(root) else {
132 continue;
133 };
134 if let Some(first) = rel.components().next() {
135 let key = root.join(first);
136 match counts.last_mut() {
137 Some(last) if last.0 == key => last.1 += 1,
138 _ => counts.push((key, 1)),
139 }
140 }
141 }
142 counts
143}