Skip to main content

cc_audit/engine/scanners/
walker.rs

1//! Directory walking abstraction for consistent file discovery.
2
3use crate::ignore::IgnoreFilter;
4use std::path::{Path, PathBuf};
5use walkdir::WalkDir;
6
7/// Configuration for directory walking.
8#[derive(Debug, Clone, Default)]
9pub struct WalkConfig {
10    /// Root patterns to search (e.g., [".claude/commands", "commands"]).
11    pub root_patterns: Vec<PathBuf>,
12    /// File extensions to include (e.g., ["md", "yaml", "json"]).
13    pub file_extensions: Vec<&'static str>,
14    /// Maximum depth to traverse. None means unlimited.
15    pub max_depth: Option<usize>,
16    /// Whether to follow symbolic links.
17    pub follow_symlinks: bool,
18}
19
20impl WalkConfig {
21    /// Create a new WalkConfig with specified patterns.
22    pub fn new(patterns: impl IntoIterator<Item = impl Into<PathBuf>>) -> Self {
23        Self {
24            root_patterns: patterns.into_iter().map(Into::into).collect(),
25            ..Default::default()
26        }
27    }
28
29    /// Set file extensions to include.
30    pub fn with_extensions(mut self, extensions: &[&'static str]) -> Self {
31        self.file_extensions = extensions.to_vec();
32        self
33    }
34
35    /// Set maximum depth.
36    pub fn with_max_depth(mut self, depth: usize) -> Self {
37        self.max_depth = Some(depth);
38        self
39    }
40
41    /// Set whether to follow symlinks.
42    pub fn with_follow_symlinks(mut self, follow: bool) -> Self {
43        self.follow_symlinks = follow;
44        self
45    }
46}
47
48/// Directory walker with optional ignore filter.
49pub struct DirectoryWalker {
50    config: WalkConfig,
51    ignore_filter: Option<IgnoreFilter>,
52}
53
54impl DirectoryWalker {
55    /// Create a new DirectoryWalker with the given configuration.
56    pub fn new(config: WalkConfig) -> Self {
57        Self {
58            config,
59            ignore_filter: None,
60        }
61    }
62
63    /// Set an ignore filter.
64    pub fn with_ignore_filter(mut self, filter: IgnoreFilter) -> Self {
65        self.ignore_filter = Some(filter);
66        self
67    }
68
69    /// Check if a path should be ignored.
70    fn is_ignored(&self, path: &Path) -> bool {
71        self.ignore_filter
72            .as_ref()
73            .is_some_and(|f| f.is_ignored(path))
74    }
75
76    /// Check if a path matches the configured extensions.
77    fn matches_extension(&self, path: &Path) -> bool {
78        if self.config.file_extensions.is_empty() {
79            return true;
80        }
81
82        // Compare case-insensitively so `.MD`/`.SH` on a case-sensitive
83        // filesystem aren't silently skipped (matches SkillFileFilter). See #228.
84        path.extension()
85            .and_then(|ext| ext.to_str())
86            .is_some_and(|ext| {
87                self.config
88                    .file_extensions
89                    .contains(&ext.to_lowercase().as_str())
90            })
91    }
92
93    /// Walk the directory and yield matching file paths.
94    pub fn walk<'a>(&'a self, base_dir: &'a Path) -> impl Iterator<Item = PathBuf> + 'a {
95        self.config.root_patterns.iter().flat_map(move |pattern| {
96            let target = base_dir.join(pattern);
97            if !target.exists() {
98                return Vec::new();
99            }
100
101            let mut walker = WalkDir::new(&target).follow_links(self.config.follow_symlinks);
102
103            if let Some(depth) = self.config.max_depth {
104                walker = walker.max_depth(depth);
105            }
106
107            walker
108                .into_iter()
109                .filter_map(|entry| match entry {
110                    Ok(e) => Some(e),
111                    Err(e) => {
112                        tracing::warn!(error = %e, "ディレクトリエントリの読み取りに失敗。スキップします");
113                        None
114                    }
115                })
116                .filter(|e| e.file_type().is_file())
117                .filter(|e| self.matches_extension(e.path()))
118                .filter(|e| !self.is_ignored(e.path()))
119                .map(|e| e.path().to_path_buf())
120                .collect::<Vec<_>>()
121        })
122    }
123
124    /// Walk a single directory (not using patterns).
125    pub fn walk_single(&self, dir: &Path) -> impl Iterator<Item = PathBuf> + '_ {
126        let mut walker = WalkDir::new(dir).follow_links(self.config.follow_symlinks);
127
128        if let Some(depth) = self.config.max_depth {
129            walker = walker.max_depth(depth);
130        }
131
132        walker
133            .into_iter()
134            .filter_map(|entry| match entry {
135                Ok(e) => Some(e),
136                Err(e) => {
137                    tracing::warn!(error = %e, "ディレクトリエントリの読み取りに失敗。スキップします");
138                    None
139                }
140            })
141            .filter(|e| e.file_type().is_file())
142            .filter(|e| self.matches_extension(e.path()))
143            .filter(|e| !self.is_ignored(e.path()))
144            .map(|e| e.path().to_path_buf())
145            .collect::<Vec<_>>()
146            .into_iter()
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use std::fs;
154    use tempfile::TempDir;
155
156    fn create_test_dir() -> TempDir {
157        let dir = TempDir::new().unwrap();
158
159        // Create test structure
160        let commands = dir.path().join(".claude").join("commands");
161        fs::create_dir_all(&commands).unwrap();
162        fs::write(commands.join("test.md"), "test content").unwrap();
163        fs::write(commands.join("other.txt"), "other content").unwrap();
164
165        let scripts = dir.path().join("scripts");
166        fs::create_dir_all(&scripts).unwrap();
167        fs::write(scripts.join("script.sh"), "#!/bin/bash").unwrap();
168
169        dir
170    }
171
172    #[test]
173    fn test_walk_with_pattern() {
174        let dir = create_test_dir();
175        let config = WalkConfig::new([".claude/commands"]).with_extensions(&["md"]);
176
177        let walker = DirectoryWalker::new(config);
178        let files: Vec<_> = walker.walk(dir.path()).collect();
179
180        assert_eq!(files.len(), 1);
181        assert!(files[0].ends_with("test.md"));
182    }
183
184    #[test]
185    fn test_walk_without_extension_filter() {
186        let dir = create_test_dir();
187        let config = WalkConfig::new([".claude/commands"]);
188
189        let walker = DirectoryWalker::new(config);
190        let files: Vec<_> = walker.walk(dir.path()).collect();
191
192        assert_eq!(files.len(), 2);
193    }
194
195    #[test]
196    fn test_walk_matches_uppercase_extension() {
197        // Regression (#228): `.MD` must match the `md` filter on a
198        // case-sensitive filesystem.
199        let dir = TempDir::new().unwrap();
200        let commands = dir.path().join(".claude").join("commands");
201        fs::create_dir_all(&commands).unwrap();
202        fs::write(commands.join("evil.MD"), "content").unwrap();
203
204        let config = WalkConfig::new([".claude/commands"]).with_extensions(&["md"]);
205        let walker = DirectoryWalker::new(config);
206        let files: Vec<_> = walker.walk(dir.path()).collect();
207
208        assert_eq!(files.len(), 1, "evil.MD should match the `md` extension");
209        assert!(files[0].ends_with("evil.MD"));
210    }
211
212    #[test]
213    fn test_walk_single() {
214        let dir = create_test_dir();
215        let config = WalkConfig::default().with_extensions(&["sh"]);
216
217        let walker = DirectoryWalker::new(config);
218        let scripts_dir = dir.path().join("scripts");
219        let files: Vec<_> = walker.walk_single(&scripts_dir).collect();
220
221        assert_eq!(files.len(), 1);
222        assert!(files[0].ends_with("script.sh"));
223    }
224
225    #[test]
226    fn test_walk_nonexistent_pattern() {
227        let dir = create_test_dir();
228        let config = WalkConfig::new(["nonexistent"]);
229
230        let walker = DirectoryWalker::new(config);
231        let files: Vec<_> = walker.walk(dir.path()).collect();
232
233        assert!(files.is_empty());
234    }
235
236    #[test]
237    fn test_walk_with_max_depth() {
238        let dir = create_test_dir();
239
240        // Create nested structure
241        let nested = dir.path().join("deep").join("nested").join("dir");
242        fs::create_dir_all(&nested).unwrap();
243        fs::write(nested.join("file.md"), "content").unwrap();
244
245        let config = WalkConfig::new(["deep"]).with_max_depth(1);
246
247        let walker = DirectoryWalker::new(config);
248        let files: Vec<_> = walker.walk(dir.path()).collect();
249
250        // Should not find the deeply nested file
251        assert!(files.is_empty());
252    }
253}