codeowners_validation/validators/
exists.rs

1use crate::parser::CodeOwnerRule;
2use globset::{Glob, GlobSetBuilder};
3use ignore::{DirEntry, WalkBuilder, WalkState};
4use rustc_hash::FxHashMap;
5use std::error::Error;
6use std::path::Path;
7use std::sync::{
8    atomic::{AtomicUsize, Ordering},
9    Arc,
10};
11
12fn build_globset_with_mapping(
13    rules: &[&CodeOwnerRule],
14) -> Result<(globset::GlobSet, FxHashMap<usize, usize>), globset::Error> {
15    let mut builder = GlobSetBuilder::new();
16    let mut globset_idx_to_rule_idx = FxHashMap::default();
17    let mut globset_idx = 0;
18
19    for (rule_idx, rule) in rules.iter().enumerate() {
20        let pattern = &rule.pattern;
21        let is_directory = rule.original_path.ends_with('/');
22        let is_anchored = rule.original_path.starts_with('/');
23
24        match (is_anchored, is_directory) {
25            (true, true) => {
26                // /docs/ → match "docs" and "docs/**"
27                builder.add(Glob::new(pattern)?);
28                globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
29                globset_idx += 1;
30
31                builder.add(Glob::new(&format!("{}/**", pattern))?);
32                globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
33                globset_idx += 1;
34            }
35            (true, false) => {
36                // /src/file.rs → match "src/file.rs" exactly
37                builder.add(Glob::new(pattern)?);
38                globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
39                globset_idx += 1;
40            }
41            (false, true) => {
42                // lib/ → match "**/lib" and "**/lib/**"
43                builder.add(Glob::new(&format!("**/{}", pattern))?);
44                globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
45                globset_idx += 1;
46
47                builder.add(Glob::new(&format!("**/{}/**", pattern))?);
48                globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
49                globset_idx += 1;
50            }
51            (false, false) => {
52                // *.rs → match "**/*.rs" (or just pattern if it's already a glob)
53                if pattern.contains('*') || pattern.contains('?') || pattern.contains('[') {
54                    // Already a wildcard pattern like *.rs, **/*.md
55                    builder.add(Glob::new(pattern)?);
56                    globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
57                    globset_idx += 1;
58                } else {
59                    // Plain file like config.json → match "**/config.json"
60                    builder.add(Glob::new(&format!("**/{}", pattern))?);
61                    globset_idx_to_rule_idx.insert(globset_idx, rule_idx);
62                    globset_idx += 1;
63                }
64            }
65        }
66    }
67
68    Ok((builder.build()?, globset_idx_to_rule_idx))
69}
70
71pub fn validate_directory(
72    repo_path: &Path,
73    rules: &[CodeOwnerRule],
74) -> Result<Vec<CodeOwnerRule>, Box<dyn Error>> {
75    // OPTIMIZATION: Pre-allocate with estimated capacity
76    let estimated_direct = rules.len() / 3;
77    let estimated_wildcard = rules.len() - estimated_direct;
78
79    let mut direct_rules = Vec::with_capacity(estimated_direct);
80    let mut wildcard_rules = Vec::with_capacity(estimated_wildcard);
81
82    // Separate direct and wildcard rules
83    for rule in rules {
84        if rule.pattern.contains('*')
85            || rule.pattern.contains('?')
86            || rule.pattern.contains('[')
87            || rule.pattern.contains(']')
88        {
89            wildcard_rules.push(rule);
90        } else {
91            // Direct paths - but we need to check if they're anchored
92            // Anchored paths are direct checks, non-anchored need glob matching
93            if rule.original_path.starts_with('/') {
94                direct_rules.push(rule);
95            } else {
96                // Non-anchored non-wildcard patterns like "main.rs" need glob matching
97                wildcard_rules.push(rule);
98            }
99        }
100    }
101
102    // Check direct paths (fast path for anchored patterns only)
103    let mut missing = Vec::new();
104    for rule in direct_rules {
105        let path = repo_path.join(&rule.pattern);
106
107        if !path.exists() {
108            missing.push(rule.clone());
109        }
110    }
111
112    if wildcard_rules.is_empty() {
113        return Ok(missing);
114    }
115
116    let (globset, idx_mapping) = build_globset_with_mapping(&wildcard_rules)?;
117    let num_wildcards = wildcard_rules.len();
118
119    // OPTIMIZATION: Use atomic array for lock-free tracking
120    let matched: Arc<Vec<AtomicUsize>> =
121        Arc::new((0..num_wildcards).map(|_| AtomicUsize::new(0)).collect());
122    let remaining = Arc::new(AtomicUsize::new(num_wildcards));
123
124    // OPTIMIZATION: Dynamic thread count based on workload
125    let thread_count = if num_wildcards > 5000 {
126        num_cpus::get().min(8) // More threads for large workloads
127    } else if num_wildcards > 1000 {
128        num_cpus::get().min(4) // Moderate threads
129    } else {
130        2 // Minimal threads for small workloads
131    };
132
133    WalkBuilder::new(repo_path)
134        .standard_filters(false)
135        .hidden(false) // Check hidden files too
136        .git_ignore(false) // Disable for performance
137        .git_global(false)
138        .git_exclude(false)
139        .threads(thread_count)
140        .build_parallel()
141        .run(|| {
142            let globset = globset.clone();
143            let matched = Arc::clone(&matched);
144            let remaining = Arc::clone(&remaining);
145            let idx_mapping = idx_mapping.clone();
146
147            Box::new(move |entry: Result<DirEntry, ignore::Error>| {
148                let dir_entry = match entry {
149                    Ok(de) => de,
150                    Err(_) => return WalkState::Continue,
151                };
152
153                let path = dir_entry.path();
154
155                // Skip .git directory
156                if dir_entry.file_type().is_some_and(|ft| ft.is_dir())
157                    && path.file_name().is_some_and(|name| name == ".git")
158                {
159                    return WalkState::Skip;
160                }
161
162                // OPTIMIZATION: Early exit check
163                if remaining.load(Ordering::Relaxed) == 0 {
164                    return WalkState::Quit;
165                }
166
167                if let Ok(rel_path) = path.strip_prefix(repo_path) {
168                    let matches = globset.matches(rel_path);
169                    if !matches.is_empty() {
170                        for glob_idx in matches {
171                            // Map from globset index to wildcard rule index
172                            if let Some(&rule_idx) = idx_mapping.get(&glob_idx) {
173                                // Only decrement remaining if this is the first match
174                                if matched[rule_idx].fetch_add(1, Ordering::Relaxed) == 0 {
175                                    remaining.fetch_sub(1, Ordering::Relaxed);
176                                }
177                            }
178                        }
179                    }
180                }
181
182                WalkState::Continue
183            })
184        });
185
186    // Collect unmatched wildcard rules
187    for (idx, rule) in wildcard_rules.iter().enumerate() {
188        if matched[idx].load(Ordering::Relaxed) == 0 {
189            missing.push((*rule).clone());
190        }
191    }
192
193    Ok(missing)
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use crate::parser::CodeOwnerRule;
200    use std::fs;
201    use tempfile::tempdir;
202
203    fn rule(pattern: &str, original: &str) -> CodeOwnerRule {
204        CodeOwnerRule {
205            pattern: pattern.trim_matches('/').to_string(),
206            original_path: original.to_string(),
207            owners: vec!["@team".to_string()],
208        }
209    }
210
211    #[test]
212    fn detects_missing_file() {
213        let tmp = tempdir().unwrap();
214        let rules = vec![rule("missing.txt", "missing.txt")];
215        let result = validate_directory(tmp.path(), &rules).unwrap();
216        assert_eq!(result.len(), 1);
217        assert_eq!(result[0].pattern, "missing.txt");
218    }
219
220    #[test]
221    fn passes_existing_file() {
222        let tmp = tempdir().unwrap();
223        let file_path = tmp.path().join("exists.txt");
224        fs::write(&file_path, "content").unwrap();
225        let rules = vec![rule("exists.txt", "exists.txt")];
226        let result = validate_directory(tmp.path(), &rules).unwrap();
227        assert!(result.is_empty());
228    }
229
230    #[test]
231    fn matches_wildcard_files() {
232        let tmp = tempdir().unwrap();
233        fs::write(tmp.path().join("foo.md"), "docs").unwrap();
234        let rules = vec![rule("*.md", "*.md")];
235        let result = validate_directory(tmp.path(), &rules).unwrap();
236        assert!(result.is_empty());
237    }
238
239    #[test]
240    fn detects_unmatched_wildcards() {
241        let tmp = tempdir().unwrap();
242        let rules = vec![rule("*.xyz", "*.xyz")];
243        let result = validate_directory(tmp.path(), &rules).unwrap();
244        assert_eq!(result.len(), 1);
245    }
246
247    #[test]
248    fn handles_anchored_patterns() {
249        let tmp = tempdir().unwrap();
250        // Create src/main.rs
251        let src_dir = tmp.path().join("src");
252        fs::create_dir(&src_dir).unwrap();
253        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
254
255        // Anchored pattern should match
256        let rules = vec![rule("src/main.rs", "/src/main.rs")];
257        let result = validate_directory(tmp.path(), &rules).unwrap();
258        assert!(result.is_empty());
259
260        // Non-anchored pattern should also match
261        let rules = vec![rule("main.rs", "main.rs")];
262        let result = validate_directory(tmp.path(), &rules).unwrap();
263        assert!(result.is_empty());
264    }
265
266    #[test]
267    fn handles_directory_patterns() {
268        let tmp = tempdir().unwrap();
269        let docs_dir = tmp.path().join("docs");
270        fs::create_dir(&docs_dir).unwrap();
271        fs::write(docs_dir.join("README.md"), "# Docs").unwrap();
272
273        // Directory pattern should match
274        let rules = vec![rule("docs", "docs/")];
275        let result = validate_directory(tmp.path(), &rules).unwrap();
276        assert!(result.is_empty());
277
278        // Anchored directory pattern
279        let rules = vec![rule("docs", "/docs/")];
280        let result = validate_directory(tmp.path(), &rules).unwrap();
281        assert!(result.is_empty());
282    }
283
284    #[test]
285    fn handles_nested_patterns() {
286        let tmp = tempdir().unwrap();
287        // Create nested structure
288        let nested = tmp.path().join("a").join("b").join("c");
289        fs::create_dir_all(&nested).unwrap();
290        fs::write(nested.join("file.txt"), "content").unwrap();
291
292        // Non-anchored should match anywhere
293        let rules = vec![rule("file.txt", "file.txt")];
294        let result = validate_directory(tmp.path(), &rules).unwrap();
295        assert!(result.is_empty());
296
297        // Anchored should not match nested file
298        let rules = vec![rule("file.txt", "/file.txt")];
299        let result = validate_directory(tmp.path(), &rules).unwrap();
300        assert_eq!(result.len(), 1);
301    }
302
303    #[test]
304    fn handles_complex_wildcards() {
305        let tmp = tempdir().unwrap();
306        let src = tmp.path().join("src");
307        fs::create_dir(&src).unwrap();
308        fs::write(src.join("main.test.js"), "test").unwrap();
309
310        // Complex wildcard pattern
311        let rules = vec![rule("**/*.test.js", "**/*.test.js")];
312        let result = validate_directory(tmp.path(), &rules).unwrap();
313        assert!(result.is_empty());
314    }
315}