1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::ffi::OsString;
4use std::path::PathBuf;
5
6use globset::Glob;
7use globset::GlobSet;
8use globset::GlobSetBuilder;
9use rayon::iter::IntoParallelIterator;
10use rayon::iter::ParallelIterator;
11use walkdir::WalkDir;
12
13use crate::Database;
14use crate::error::DatabaseError;
15use crate::exclusion::Exclusion;
16use crate::file::File;
17use crate::file::FileType;
18use crate::utils::read_file;
19
20pub struct DatabaseLoader {
22 database: Option<Database>,
23 workspace: PathBuf,
24 paths: Vec<PathBuf>,
25 includes: Vec<PathBuf>,
26 excludes: Vec<Exclusion>,
27 memory_sources: Vec<(&'static str, &'static str, FileType)>,
28 extensions: Vec<String>,
29}
30
31impl DatabaseLoader {
32 #[allow(clippy::too_many_arguments)]
34 pub fn new(
35 workspace: PathBuf,
36 paths: Vec<PathBuf>,
37 includes: Vec<PathBuf>,
38 excludes: Vec<Exclusion>,
39 extensions: Vec<String>,
40 ) -> Self {
41 Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
42 }
43
44 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
54 self.memory_sources.push((name, contents, file_type));
55 }
56
57 pub fn load(mut self) -> Result<Database, DatabaseError> {
62 let mut db = if let Some(existing_db) = self.database.take() { existing_db } else { Database::new() };
63
64 let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
65
66 let mut glob_builder = GlobSetBuilder::new();
67 for ex in &self.excludes {
68 if let Exclusion::Pattern(pat) = ex {
69 glob_builder.add(Glob::new(pat)?);
70 }
71 }
72
73 let glob_excludes = glob_builder.build()?;
74 let host_files = self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes)?;
75 let vendored_files = self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes)?;
76
77 for file in host_files.into_iter().chain(vendored_files.into_iter()) {
78 db.add(file);
79 }
80
81 for (name, contents, file_type) in self.memory_sources {
82 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
83
84 db.add(file);
85 }
86
87 Ok(db)
88 }
89
90 fn load_paths(
92 &self,
93 roots: &[PathBuf],
94 file_type: FileType,
95 extensions: &HashSet<OsString>,
96 glob_excludes: &GlobSet,
97 ) -> Result<Vec<File>, DatabaseError> {
98 let path_excludes: HashSet<_> = self
100 .excludes
101 .iter()
102 .filter_map(|ex| match ex {
103 Exclusion::Path(p) => p.canonicalize().ok(),
104 _ => None,
105 })
106 .collect();
107
108 let mut paths_to_process = Vec::new();
109 for root in roots {
110 for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
111 if entry.file_type().is_file() {
112 paths_to_process.push(entry.into_path());
113 }
114 }
115 }
116
117 let files: Vec<File> = paths_to_process
119 .into_par_iter() .filter_map(|path| {
121 if glob_excludes.is_match(&path) {
123 return None;
124 }
125 if let Ok(p) = path.canonicalize()
126 && path_excludes.contains(&p)
127 {
128 return None;
129 }
130 if let Some(ext) = path.extension() {
131 if !extensions.contains(ext) {
132 return None;
133 }
134 } else {
135 return None;
136 }
137
138 match read_file(&self.workspace, &path, file_type) {
141 Ok(file) => Some(Ok(file)),
142 Err(e) => Some(Err(e)),
143 }
144 })
145 .collect::<Result<Vec<File>, _>>()?; Ok(files)
148 }
149}