1use std::borrow::Cow;
4use std::ffi::OsString;
5use std::path::Path;
6
7use ahash::HashMap;
8use ahash::HashSet;
9use globset::Glob;
10use globset::GlobSet;
11use globset::GlobSetBuilder;
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use crate::Database;
16use crate::DatabaseConfiguration;
17use crate::error::DatabaseError;
18use crate::exclusion::Exclusion;
19use crate::file::File;
20use crate::file::FileId;
21use crate::file::FileType;
22use crate::utils::read_file;
23
24#[derive(Debug)]
30struct FileWithSpecificity {
31 file: File,
32 specificity: usize,
33}
34
35pub struct DatabaseLoader<'a> {
37 database: Option<Database<'a>>,
38 configuration: DatabaseConfiguration<'a>,
39 memory_sources: Vec<(&'static str, &'static str, FileType)>,
40}
41
42impl<'a> DatabaseLoader<'a> {
43 pub fn new(configuration: DatabaseConfiguration<'a>) -> Self {
44 Self { configuration, memory_sources: vec![], database: None }
45 }
46
47 pub fn with_database(mut self, database: Database<'a>) -> Self {
48 self.database = Some(database);
49 self
50 }
51
52 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
53 self.memory_sources.push((name, contents, file_type));
54 }
55
56 pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
57 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
58
59 db.configuration = self.configuration.clone();
62
63 let extensions_set: HashSet<OsString> =
64 self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
65
66 let mut glob_builder = GlobSetBuilder::new();
67 for ex in &self.configuration.excludes {
68 if let Exclusion::Pattern(pat) = ex {
69 glob_builder.add(Glob::new(pat)?);
70 }
71 }
72
73 let glob_excludes = glob_builder.build()?;
74
75 let path_excludes: HashSet<_> = self
76 .configuration
77 .excludes
78 .iter()
79 .filter_map(|ex| match ex {
80 Exclusion::Path(p) => Some(p),
81 _ => None,
82 })
83 .collect();
84
85 let host_files_with_spec = self.load_paths(
86 &self.configuration.paths,
87 FileType::Host,
88 &extensions_set,
89 &glob_excludes,
90 &path_excludes,
91 )?;
92 let vendored_files_with_spec = self.load_paths(
93 &self.configuration.includes,
94 FileType::Vendored,
95 &extensions_set,
96 &glob_excludes,
97 &path_excludes,
98 )?;
99
100 let mut all_files: HashMap<FileId, File> = HashMap::default();
101 let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
102
103 for file_with_spec in host_files_with_spec {
105 let file_id = file_with_spec.file.id;
106 let specificity = file_with_spec.specificity;
107
108 all_files.insert(file_id, file_with_spec.file);
109 file_decisions.insert(file_id, (FileType::Host, specificity));
110 }
111
112 for file_with_spec in vendored_files_with_spec {
113 let file_id = file_with_spec.file.id;
114 let vendored_specificity = file_with_spec.specificity;
115
116 all_files.entry(file_id).or_insert(file_with_spec.file);
117
118 match file_decisions.get(&file_id) {
119 Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
120 }
122 _ => {
123 file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
124 }
125 }
126 }
127
128 for (file_id, (final_type, _)) in file_decisions {
129 if let Some(mut file) = all_files.remove(&file_id) {
130 file.file_type = final_type;
131 db.add(file);
132 }
133 }
134
135 for (name, contents, file_type) in self.memory_sources {
136 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
137
138 db.add(file);
139 }
140
141 Ok(db)
142 }
143
144 fn load_paths(
152 &self,
153 roots: &[Cow<'a, str>],
154 file_type: FileType,
155 extensions: &HashSet<OsString>,
156 glob_excludes: &GlobSet,
157 path_excludes: &HashSet<&Cow<'a, Path>>,
158 ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
159 let mut paths_to_process: Vec<(std::path::PathBuf, String, usize)> = Vec::new();
160
161 for root in roots {
162 let is_glob_pattern = root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{');
164
165 let specificity = Self::calculate_pattern_specificity(root.as_ref());
166 if is_glob_pattern {
167 let pattern = if Path::new(root.as_ref()).is_absolute() {
169 root.to_string()
170 } else {
171 self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
173 };
174
175 match glob::glob(&pattern) {
176 Ok(entries) => {
177 for entry in entries {
178 match entry {
179 Ok(path) => {
180 if path.is_file() {
181 paths_to_process.push((path, root.to_string(), specificity));
182 }
183 }
184 Err(e) => {
185 tracing::warn!("Failed to read glob entry: {}", e);
186 }
187 }
188 }
189 }
190 Err(e) => {
191 return Err(DatabaseError::Glob(e.to_string()));
192 }
193 }
194 } else {
195 let dir_path = if Path::new(root.as_ref()).is_absolute() {
197 Path::new(root.as_ref()).to_path_buf()
198 } else {
199 self.configuration.workspace.join(root.as_ref())
200 };
201
202 for entry in WalkDir::new(&dir_path).into_iter().filter_map(Result::ok) {
203 if entry.file_type().is_file() {
204 paths_to_process.push((entry.into_path(), root.to_string(), specificity));
205 }
206 }
207 }
208 }
209
210 let files: Vec<FileWithSpecificity> = paths_to_process
211 .into_par_iter()
212 .filter_map(|(path, _pattern, specificity)| {
213 if glob_excludes.is_match(&path) {
214 return None;
215 }
216
217 if let Ok(canonical_path) = path.canonicalize()
218 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
219 {
220 return None;
221 }
222
223 if let Some(ext) = path.extension() {
224 if !extensions.contains(ext) {
225 return None;
226 }
227 } else {
228 return None;
229 }
230
231 match read_file(self.configuration.workspace.as_ref(), &path, file_type) {
232 Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
233 Err(e) => Some(Err(e)),
234 }
235 })
236 .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
237
238 Ok(files)
239 }
240
241 fn calculate_pattern_specificity(pattern: &str) -> usize {
249 let pattern_path = Path::new(pattern);
250
251 let component_count = pattern_path.components().count();
252 let is_glob = pattern.contains('*') || pattern.contains('?') || pattern.contains('[') || pattern.contains('{');
253
254 if is_glob {
255 let non_wildcard_components = pattern_path
256 .components()
257 .filter(|c| {
258 let s = c.as_os_str().to_string_lossy();
259 !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
260 })
261 .count();
262 non_wildcard_components * 10
263 } else if pattern_path.is_file() || pattern_path.extension().is_some() || pattern.ends_with(".php") {
264 component_count * 1000
265 } else {
266 component_count * 100
267 }
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274 use crate::DatabaseReader;
275 use std::borrow::Cow;
276 use tempfile::TempDir;
277
278 fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
279 let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
281
282 DatabaseConfiguration {
283 workspace: Cow::Owned(temp_dir.path().to_path_buf()),
284 paths: paths.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
285 includes: includes.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
286 excludes: vec![],
287 extensions: vec![Cow::Borrowed("php")],
288 }
289 }
290
291 fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
292 let file_path = temp_dir.path().join(relative_path);
293 if let Some(parent) = file_path.parent() {
294 std::fs::create_dir_all(parent).unwrap();
295 }
296 std::fs::write(file_path, content).unwrap();
297 }
298
299 #[test]
300 fn test_specificity_calculation_exact_file() {
301 let spec = DatabaseLoader::calculate_pattern_specificity("src/b.php");
302 assert!(spec >= 2000, "Exact file should have high specificity, got {}", spec);
303 }
304
305 #[test]
306 fn test_specificity_calculation_directory() {
307 let spec = DatabaseLoader::calculate_pattern_specificity("src/");
308 assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {}", spec);
309 }
310
311 #[test]
312 fn test_specificity_calculation_glob() {
313 let spec = DatabaseLoader::calculate_pattern_specificity("src/*.php");
314 assert!(spec < 100, "Glob pattern should have low specificity, got {}", spec);
315 }
316
317 #[test]
318 fn test_specificity_calculation_deeper_path() {
319 let shallow_spec = DatabaseLoader::calculate_pattern_specificity("src/");
320 let deep_spec = DatabaseLoader::calculate_pattern_specificity("src/foo/bar/");
321 assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
322 }
323
324 #[test]
325 fn test_exact_file_vs_directory() {
326 let temp_dir = TempDir::new().unwrap();
327
328 create_test_file(&temp_dir, "src/b.php", "<?php");
329 create_test_file(&temp_dir, "src/a.php", "<?php");
330
331 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
332 let loader = DatabaseLoader::new(config);
333 let db = loader.load().unwrap();
334
335 let b_file = db.files().find(|f| f.name.contains("b.php")).unwrap();
336 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
337
338 let a_file = db.files().find(|f| f.name.contains("a.php")).unwrap();
339 assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
340 }
341
342 #[test]
343 fn test_deeper_vs_shallower_directory() {
344 let temp_dir = TempDir::new().unwrap();
345
346 create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
347
348 let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
349 let loader = DatabaseLoader::new(config);
350 let db = loader.load().unwrap();
351
352 let file = db.files().find(|f| f.name.contains("bar.php")).unwrap();
353 assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
354 }
355
356 #[test]
357 fn test_exact_file_vs_glob() {
358 let temp_dir = TempDir::new().unwrap();
359
360 create_test_file(&temp_dir, "src/b.php", "<?php");
361
362 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
363 let loader = DatabaseLoader::new(config);
364 let db = loader.load().unwrap();
365
366 let file = db.files().find(|f| f.name.contains("b.php")).unwrap();
367 assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
368 }
369
370 #[test]
371 fn test_equal_specificity_includes_wins() {
372 let temp_dir = TempDir::new().unwrap();
373
374 create_test_file(&temp_dir, "src/a.php", "<?php");
375
376 let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
377 let loader = DatabaseLoader::new(config);
378 let db = loader.load().unwrap();
379
380 let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
381 assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
382 }
383
384 #[test]
385 fn test_complex_scenario_from_bug_report() {
386 let temp_dir = TempDir::new().unwrap();
387
388 create_test_file(&temp_dir, "src/a.php", "<?php");
389 create_test_file(&temp_dir, "src/b.php", "<?php");
390 create_test_file(&temp_dir, "src/c/d.php", "<?php");
391 create_test_file(&temp_dir, "src/c/e.php", "<?php");
392 create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
393 create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
394
395 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
396 let loader = DatabaseLoader::new(config);
397 let db = loader.load().unwrap();
398
399 let b_file = db.files().find(|f| f.name.contains("src/b.php") || f.name.ends_with("b.php")).unwrap();
400 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
401
402 let d_file = db.files().find(|f| f.name.contains("d.php")).unwrap();
403 assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
404
405 let lib_file = db.files().find(|f| f.name.contains("lib1.php")).unwrap();
406 assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
407 }
408
409 #[test]
410 fn test_files_only_in_paths() {
411 let temp_dir = TempDir::new().unwrap();
412
413 create_test_file(&temp_dir, "src/a.php", "<?php");
414
415 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
416 let loader = DatabaseLoader::new(config);
417 let db = loader.load().unwrap();
418
419 let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
420 assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
421 }
422
423 #[test]
424 fn test_files_only_in_includes() {
425 let temp_dir = TempDir::new().unwrap();
426
427 create_test_file(&temp_dir, "vendor/lib.php", "<?php");
428
429 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
430 let loader = DatabaseLoader::new(config);
431 let db = loader.load().unwrap();
432
433 let file = db.files().find(|f| f.name.contains("lib.php")).unwrap();
434 assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
435 }
436}