1use std::borrow::Cow;
4use std::ffi::OsString;
5use std::path::Path;
6
7use ahash::HashMap;
8use ahash::HashSet;
9use globset::Glob;
10use globset::GlobSet;
11use globset::GlobSetBuilder;
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use crate::Database;
16use crate::DatabaseConfiguration;
17use crate::error::DatabaseError;
18use crate::exclusion::Exclusion;
19use crate::file::File;
20use crate::file::FileId;
21use crate::file::FileType;
22use crate::utils::read_file;
23
24#[derive(Debug)]
30struct FileWithSpecificity {
31 file: File,
32 specificity: usize,
33}
34
35pub struct DatabaseLoader<'a> {
37 database: Option<Database<'a>>,
38 configuration: DatabaseConfiguration<'a>,
39 memory_sources: Vec<(&'static str, &'static str, FileType)>,
40}
41
42impl<'a> DatabaseLoader<'a> {
43 #[must_use]
44 pub fn new(configuration: DatabaseConfiguration<'a>) -> Self {
45 Self { configuration, memory_sources: vec![], database: None }
46 }
47
48 #[must_use]
49 pub fn with_database(mut self, database: Database<'a>) -> Self {
50 self.database = Some(database);
51 self
52 }
53
54 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
55 self.memory_sources.push((name, contents, file_type));
56 }
57
58 pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
67 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
68
69 db.configuration = self.configuration.clone();
72
73 let extensions_set: HashSet<OsString> =
74 self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
75
76 let mut glob_builder = GlobSetBuilder::new();
77 for ex in &self.configuration.excludes {
78 if let Exclusion::Pattern(pat) = ex {
79 glob_builder.add(Glob::new(pat)?);
80 }
81 }
82
83 let glob_excludes = glob_builder.build()?;
84
85 let path_excludes: HashSet<_> = self
86 .configuration
87 .excludes
88 .iter()
89 .filter_map(|ex| match ex {
90 Exclusion::Path(p) => Some(p),
91 _ => None,
92 })
93 .collect();
94
95 let host_files_with_spec = self.load_paths(
96 &self.configuration.paths,
97 FileType::Host,
98 &extensions_set,
99 &glob_excludes,
100 &path_excludes,
101 )?;
102 let vendored_files_with_spec = self.load_paths(
103 &self.configuration.includes,
104 FileType::Vendored,
105 &extensions_set,
106 &glob_excludes,
107 &path_excludes,
108 )?;
109
110 let mut all_files: HashMap<FileId, File> = HashMap::default();
111 let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
112
113 for file_with_spec in host_files_with_spec {
115 let file_id = file_with_spec.file.id;
116 let specificity = file_with_spec.specificity;
117
118 all_files.insert(file_id, file_with_spec.file);
119 file_decisions.insert(file_id, (FileType::Host, specificity));
120 }
121
122 for file_with_spec in vendored_files_with_spec {
123 let file_id = file_with_spec.file.id;
124 let vendored_specificity = file_with_spec.specificity;
125
126 all_files.entry(file_id).or_insert(file_with_spec.file);
127
128 match file_decisions.get(&file_id) {
129 Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
130 }
132 _ => {
133 file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
134 }
135 }
136 }
137
138 for (file_id, (final_type, _)) in file_decisions {
139 if let Some(mut file) = all_files.remove(&file_id) {
140 file.file_type = final_type;
141 db.add(file);
142 }
143 }
144
145 for (name, contents, file_type) in self.memory_sources {
146 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
147
148 db.add(file);
149 }
150
151 Ok(db)
152 }
153
154 fn load_paths(
162 &self,
163 roots: &[Cow<'a, str>],
164 file_type: FileType,
165 extensions: &HashSet<OsString>,
166 glob_excludes: &GlobSet,
167 path_excludes: &HashSet<&Cow<'a, Path>>,
168 ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
169 let mut paths_to_process: Vec<(std::path::PathBuf, String, usize)> = Vec::new();
170
171 for root in roots {
172 let is_glob_pattern = root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{');
174
175 let specificity = Self::calculate_pattern_specificity(root.as_ref());
176 if is_glob_pattern {
177 let pattern = if Path::new(root.as_ref()).is_absolute() {
179 root.to_string()
180 } else {
181 self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
183 };
184
185 match glob::glob(&pattern) {
186 Ok(entries) => {
187 for entry in entries {
188 match entry {
189 Ok(path) => {
190 if path.is_file() {
191 paths_to_process.push((path, root.to_string(), specificity));
192 }
193 }
194 Err(e) => {
195 tracing::warn!("Failed to read glob entry: {}", e);
196 }
197 }
198 }
199 }
200 Err(e) => {
201 return Err(DatabaseError::Glob(e.to_string()));
202 }
203 }
204 } else {
205 let dir_path = if Path::new(root.as_ref()).is_absolute() {
207 Path::new(root.as_ref()).to_path_buf()
208 } else {
209 self.configuration.workspace.join(root.as_ref())
210 };
211
212 for entry in WalkDir::new(&dir_path).into_iter().filter_map(Result::ok) {
213 if entry.file_type().is_file() {
214 paths_to_process.push((entry.into_path(), root.to_string(), specificity));
215 }
216 }
217 }
218 }
219
220 let files: Vec<FileWithSpecificity> = paths_to_process
221 .into_par_iter()
222 .filter_map(|(path, _pattern, specificity)| {
223 if glob_excludes.is_match(&path) {
224 return None;
225 }
226
227 if let Ok(canonical_path) = path.canonicalize()
228 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
229 {
230 return None;
231 }
232
233 if let Some(ext) = path.extension() {
234 if !extensions.contains(ext) {
235 return None;
236 }
237 } else {
238 return None;
239 }
240
241 match read_file(self.configuration.workspace.as_ref(), &path, file_type) {
242 Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
243 Err(e) => Some(Err(e)),
244 }
245 })
246 .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
247
248 Ok(files)
249 }
250
251 fn calculate_pattern_specificity(pattern: &str) -> usize {
259 let pattern_path = Path::new(pattern);
260
261 let component_count = pattern_path.components().count();
262 let is_glob = pattern.contains('*') || pattern.contains('?') || pattern.contains('[') || pattern.contains('{');
263
264 if is_glob {
265 let non_wildcard_components = pattern_path
266 .components()
267 .filter(|c| {
268 let s = c.as_os_str().to_string_lossy();
269 !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
270 })
271 .count();
272 non_wildcard_components * 10
273 } else if pattern_path.is_file() || pattern_path.extension().is_some() || pattern.ends_with(".php") {
274 component_count * 1000
275 } else {
276 component_count * 100
277 }
278 }
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284 use crate::DatabaseReader;
285 use std::borrow::Cow;
286 use tempfile::TempDir;
287
288 fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
289 let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
291
292 DatabaseConfiguration {
293 workspace: Cow::Owned(temp_dir.path().to_path_buf()),
294 paths: paths.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
295 includes: includes.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
296 excludes: vec![],
297 extensions: vec![Cow::Borrowed("php")],
298 }
299 }
300
301 fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
302 let file_path = temp_dir.path().join(relative_path);
303 if let Some(parent) = file_path.parent() {
304 std::fs::create_dir_all(parent).unwrap();
305 }
306 std::fs::write(file_path, content).unwrap();
307 }
308
309 #[test]
310 fn test_specificity_calculation_exact_file() {
311 let spec = DatabaseLoader::calculate_pattern_specificity("src/b.php");
312 assert!(spec >= 2000, "Exact file should have high specificity, got {spec}");
313 }
314
315 #[test]
316 fn test_specificity_calculation_directory() {
317 let spec = DatabaseLoader::calculate_pattern_specificity("src/");
318 assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {spec}");
319 }
320
321 #[test]
322 fn test_specificity_calculation_glob() {
323 let spec = DatabaseLoader::calculate_pattern_specificity("src/*.php");
324 assert!(spec < 100, "Glob pattern should have low specificity, got {spec}");
325 }
326
327 #[test]
328 fn test_specificity_calculation_deeper_path() {
329 let shallow_spec = DatabaseLoader::calculate_pattern_specificity("src/");
330 let deep_spec = DatabaseLoader::calculate_pattern_specificity("src/foo/bar/");
331 assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
332 }
333
334 #[test]
335 fn test_exact_file_vs_directory() {
336 let temp_dir = TempDir::new().unwrap();
337
338 create_test_file(&temp_dir, "src/b.php", "<?php");
339 create_test_file(&temp_dir, "src/a.php", "<?php");
340
341 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
342 let loader = DatabaseLoader::new(config);
343 let db = loader.load().unwrap();
344
345 let b_file = db.files().find(|f| f.name.contains("b.php")).unwrap();
346 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
347
348 let a_file = db.files().find(|f| f.name.contains("a.php")).unwrap();
349 assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
350 }
351
352 #[test]
353 fn test_deeper_vs_shallower_directory() {
354 let temp_dir = TempDir::new().unwrap();
355
356 create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
357
358 let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
359 let loader = DatabaseLoader::new(config);
360 let db = loader.load().unwrap();
361
362 let file = db.files().find(|f| f.name.contains("bar.php")).unwrap();
363 assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
364 }
365
366 #[test]
367 fn test_exact_file_vs_glob() {
368 let temp_dir = TempDir::new().unwrap();
369
370 create_test_file(&temp_dir, "src/b.php", "<?php");
371
372 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
373 let loader = DatabaseLoader::new(config);
374 let db = loader.load().unwrap();
375
376 let file = db.files().find(|f| f.name.contains("b.php")).unwrap();
377 assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
378 }
379
380 #[test]
381 fn test_equal_specificity_includes_wins() {
382 let temp_dir = TempDir::new().unwrap();
383
384 create_test_file(&temp_dir, "src/a.php", "<?php");
385
386 let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
387 let loader = DatabaseLoader::new(config);
388 let db = loader.load().unwrap();
389
390 let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
391 assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
392 }
393
394 #[test]
395 fn test_complex_scenario_from_bug_report() {
396 let temp_dir = TempDir::new().unwrap();
397
398 create_test_file(&temp_dir, "src/a.php", "<?php");
399 create_test_file(&temp_dir, "src/b.php", "<?php");
400 create_test_file(&temp_dir, "src/c/d.php", "<?php");
401 create_test_file(&temp_dir, "src/c/e.php", "<?php");
402 create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
403 create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
404
405 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
406 let loader = DatabaseLoader::new(config);
407 let db = loader.load().unwrap();
408
409 let b_file = db.files().find(|f| f.name.contains("src/b.php") || f.name.ends_with("b.php")).unwrap();
410 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
411
412 let d_file = db.files().find(|f| f.name.contains("d.php")).unwrap();
413 assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
414
415 let lib_file = db.files().find(|f| f.name.contains("lib1.php")).unwrap();
416 assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
417 }
418
419 #[test]
420 fn test_files_only_in_paths() {
421 let temp_dir = TempDir::new().unwrap();
422
423 create_test_file(&temp_dir, "src/a.php", "<?php");
424
425 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
426 let loader = DatabaseLoader::new(config);
427 let db = loader.load().unwrap();
428
429 let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
430 assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
431 }
432
433 #[test]
434 fn test_files_only_in_includes() {
435 let temp_dir = TempDir::new().unwrap();
436
437 create_test_file(&temp_dir, "vendor/lib.php", "<?php");
438
439 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
440 let loader = DatabaseLoader::new(config);
441 let db = loader.load().unwrap();
442
443 let file = db.files().find(|f| f.name.contains("lib.php")).unwrap();
444 assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
445 }
446}