Skip to main content

codelens_core/walker/
parallel.rs

1//! Parallel directory walker using the `ignore` crate.
2
3use std::path::Path;
4use std::sync::Arc;
5
6use crossbeam_channel::bounded;
7use ignore::{DirEntry, WalkBuilder, WalkState};
8
9use crate::analyzer::stats::FileStats;
10use crate::analyzer::FileAnalyzer;
11use crate::error::Result;
12use crate::filter::Filter;
13
14/// Configuration for the parallel walker.
15#[derive(Debug, Clone)]
16pub struct WalkerConfig {
17    /// Number of threads to use.
18    pub threads: usize,
19    /// Whether to follow symbolic links.
20    pub follow_symlinks: bool,
21    /// Whether to respect .gitignore files.
22    pub use_gitignore: bool,
23    /// Maximum directory depth (None = unlimited).
24    pub max_depth: Option<usize>,
25    /// Additional ignore patterns.
26    pub custom_ignores: Vec<String>,
27}
28
29impl Default for WalkerConfig {
30    fn default() -> Self {
31        Self {
32            threads: num_cpus::get(),
33            follow_symlinks: false,
34            use_gitignore: true,
35            max_depth: None,
36            custom_ignores: Vec::new(),
37        }
38    }
39}
40
41/// Parallel directory walker.
42pub struct ParallelWalker {
43    config: WalkerConfig,
44}
45
46impl ParallelWalker {
47    /// Create a new parallel walker.
48    pub fn new(config: WalkerConfig) -> Self {
49        Self { config }
50    }
51
52    /// Walk directories and analyze files in parallel.
53    ///
54    /// Calls `on_file` for each successfully analyzed file,
55    /// and `on_skip` for each skipped file.
56    pub fn walk_and_analyze<F, S>(
57        &self,
58        root: &Path,
59        analyzer: Arc<FileAnalyzer>,
60        filter: Arc<dyn Filter>,
61        mut on_file: F,
62        mut on_skip: S,
63    ) -> Result<()>
64    where
65        F: FnMut(FileStats) + Send,
66        S: FnMut(&Path) + Send,
67    {
68        let (tx, rx) = bounded::<WalkResult>(1000);
69
70        // Build the walker
71        let mut builder = WalkBuilder::new(root);
72        builder
73            .hidden(false) // Don't skip hidden files by default
74            .git_ignore(self.config.use_gitignore)
75            .git_global(self.config.use_gitignore)
76            .git_exclude(self.config.use_gitignore)
77            .follow_links(self.config.follow_symlinks)
78            .threads(self.config.threads);
79
80        if let Some(depth) = self.config.max_depth {
81            builder.max_depth(Some(depth));
82        }
83
84        // Add custom ignore patterns
85        for pattern in &self.config.custom_ignores {
86            builder.add_custom_ignore_filename(pattern);
87        }
88
89        // Start parallel walk
90        let filter_clone = Arc::clone(&filter);
91        let analyzer_clone = Arc::clone(&analyzer);
92
93        builder.build_parallel().run(|| {
94            let tx = tx.clone();
95            let filter = Arc::clone(&filter_clone);
96            let analyzer = Arc::clone(&analyzer_clone);
97
98            Box::new(move |entry: std::result::Result<DirEntry, ignore::Error>| {
99                let entry = match entry {
100                    Ok(e) => e,
101                    Err(_) => return WalkState::Continue,
102                };
103
104                let path = entry.path();
105                let is_dir = entry.file_type().map(|t| t.is_dir()).unwrap_or(false);
106
107                // Apply custom filter
108                if !filter.should_include(path, is_dir) {
109                    if is_dir {
110                        return WalkState::Skip;
111                    }
112                    let _ = tx.send(WalkResult::Skipped(path.to_path_buf()));
113                    return WalkState::Continue;
114                }
115
116                // Skip directories (they're handled by the walker)
117                if is_dir {
118                    return WalkState::Continue;
119                }
120
121                // Skip non-files
122                if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
123                    return WalkState::Continue;
124                }
125
126                // Analyze the file
127                match analyzer.analyze(path) {
128                    Ok(Some(stats)) => {
129                        let _ = tx.send(WalkResult::File(stats));
130                    }
131                    Ok(None) => {
132                        let _ = tx.send(WalkResult::Skipped(path.to_path_buf()));
133                    }
134                    Err(_) => {
135                        let _ = tx.send(WalkResult::Skipped(path.to_path_buf()));
136                    }
137                }
138
139                WalkState::Continue
140            })
141        });
142
143        // Close the sender
144        drop(tx);
145
146        // Collect results
147        for result in rx {
148            match result {
149                WalkResult::File(stats) => on_file(stats),
150                WalkResult::Skipped(path) => on_skip(&path),
151            }
152        }
153
154        Ok(())
155    }
156}
157
158impl Default for ParallelWalker {
159    fn default() -> Self {
160        Self::new(WalkerConfig::default())
161    }
162}
163
164/// Result from walking a single entry.
165enum WalkResult {
166    /// Successfully analyzed file.
167    File(FileStats),
168    /// Skipped file.
169    Skipped(std::path::PathBuf),
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175    use std::sync::atomic::{AtomicUsize, Ordering};
176    use tempfile::TempDir;
177
178    struct AllowAll;
179    impl Filter for AllowAll {
180        fn should_include(&self, _path: &Path, _is_dir: bool) -> bool {
181            true
182        }
183    }
184
185    #[test]
186    fn test_walker_config_default() {
187        let config = WalkerConfig::default();
188        assert!(config.threads > 0);
189        assert!(config.use_gitignore);
190        assert!(!config.follow_symlinks);
191    }
192
193    #[test]
194    fn test_walk_empty_dir() {
195        let dir = TempDir::new().unwrap();
196        let walker = ParallelWalker::default();
197        let registry = Arc::new(crate::language::LanguageRegistry::empty());
198        let analyzer = Arc::new(FileAnalyzer::new(
199            registry,
200            &crate::config::Config::default(),
201        ));
202        let filter = Arc::new(AllowAll);
203
204        let count = AtomicUsize::new(0);
205        walker
206            .walk_and_analyze(
207                dir.path(),
208                analyzer,
209                filter,
210                |_| {
211                    count.fetch_add(1, Ordering::SeqCst);
212                },
213                |_| {},
214            )
215            .unwrap();
216
217        assert_eq!(count.load(Ordering::SeqCst), 0);
218    }
219}