code_analyze_core/
traversal.rs1use ignore::WalkBuilder;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, Mutex};
11use std::time::Instant;
12use thiserror::Error;
13use tracing::instrument;
14
15#[derive(Debug, Clone)]
16pub struct WalkEntry {
17 pub path: PathBuf,
18 pub depth: usize,
20 pub is_dir: bool,
21 pub is_symlink: bool,
22 pub symlink_target: Option<PathBuf>,
23}
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum TraversalError {
28 #[error("IO error: {0}")]
29 Io(#[from] std::io::Error),
30 #[error("internal concurrency error: {0}")]
31 Internal(String),
32}
33
34#[instrument(skip_all, fields(path = %root.display(), max_depth))]
38pub fn walk_directory(
39 root: &Path,
40 max_depth: Option<u32>,
41) -> Result<Vec<WalkEntry>, TraversalError> {
42 let start = Instant::now();
43 let mut builder = WalkBuilder::new(root);
44 builder.hidden(true).standard_filters(true);
45
46 if let Some(depth) = max_depth
48 && depth > 0
49 {
50 builder.max_depth(Some(depth as usize));
51 }
52
53 let entries = Arc::new(Mutex::new(Vec::new()));
54 let entries_clone = Arc::clone(&entries);
55
56 builder.build_parallel().run(move || {
57 let entries = Arc::clone(&entries_clone);
58 Box::new(move |result| match result {
59 Ok(entry) => {
60 let path = entry.path().to_path_buf();
61 let depth = entry.depth();
62 let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
63 let is_symlink = entry.path_is_symlink();
64
65 let symlink_target = if is_symlink {
66 std::fs::read_link(&path).ok()
67 } else {
68 None
69 };
70
71 let walk_entry = WalkEntry {
72 path,
73 depth,
74 is_dir,
75 is_symlink,
76 symlink_target,
77 };
78 let Ok(mut guard) = entries.lock() else {
79 tracing::debug!("mutex poisoned in parallel walker, skipping entry");
80 return ignore::WalkState::Skip;
81 };
82 guard.push(walk_entry);
83 ignore::WalkState::Continue
84 }
85 Err(e) => {
86 tracing::warn!(error = %e, "skipping unreadable entry");
87 ignore::WalkState::Continue
88 }
89 })
90 });
91
92 let mut entries = Arc::try_unwrap(entries)
93 .map_err(|_| {
94 TraversalError::Internal("arc unwrap failed: strong references still live".to_string())
95 })?
96 .into_inner()
97 .map_err(|_| TraversalError::Internal("mutex poisoned".to_string()))?;
98
99 let dir_count = entries.iter().filter(|e| e.is_dir).count();
100 let file_count = entries.iter().filter(|e| !e.is_dir).count();
101
102 tracing::debug!(
103 entries = entries.len(),
104 dirs = dir_count,
105 files = file_count,
106 duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
107 "walk complete"
108 );
109
110 entries.sort_by(|a, b| a.path.cmp(&b.path));
112 Ok(entries)
113}
114
115#[must_use]
120pub fn subtree_counts_from_entries(root: &Path, entries: &[WalkEntry]) -> Vec<(PathBuf, usize)> {
121 let mut counts: Vec<(PathBuf, usize)> = Vec::new();
122 for entry in entries {
123 if entry.is_dir {
124 continue;
125 }
126 if entry.path.components().any(|c| {
128 let s = c.as_os_str().to_string_lossy();
129 crate::EXCLUDED_DIRS.contains(&s.as_ref())
130 }) {
131 continue;
132 }
133 let Ok(rel) = entry.path.strip_prefix(root) else {
134 continue;
135 };
136 if let Some(first) = rel.components().next() {
137 let key = root.join(first);
138 match counts.last_mut() {
139 Some(last) if last.0 == key => last.1 += 1,
140 _ => counts.push((key, 1)),
141 }
142 }
143 }
144 counts
145}