1use std::borrow::Cow;
4use std::collections::HashSet;
5use std::ffi::OsString;
6use std::path::Path;
7
8use globset::Glob;
9use globset::GlobSet;
10use globset::GlobSetBuilder;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14use crate::Database;
15use crate::DatabaseConfiguration;
16use crate::error::DatabaseError;
17use crate::exclusion::Exclusion;
18use crate::file::File;
19use crate::file::FileType;
20use crate::utils::read_file;
21
22pub struct DatabaseLoader<'a> {
24 database: Option<Database<'a>>,
25 configuration: DatabaseConfiguration<'a>,
26 memory_sources: Vec<(&'static str, &'static str, FileType)>,
27}
28
29impl<'a> DatabaseLoader<'a> {
30 pub fn new(configuration: DatabaseConfiguration<'a>) -> Self {
31 Self { configuration, memory_sources: vec![], database: None }
32 }
33
34 pub fn with_database(mut self, database: Database<'a>) -> Self {
35 self.database = Some(database);
36 self
37 }
38
39 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
40 self.memory_sources.push((name, contents, file_type));
41 }
42
43 pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
44 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
45
46 db.configuration = self.configuration.clone();
49
50 let extensions_set: HashSet<OsString> =
51 self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
52
53 let mut glob_builder = GlobSetBuilder::new();
54 for ex in &self.configuration.excludes {
55 if let Exclusion::Pattern(pat) = ex {
56 glob_builder.add(Glob::new(pat)?);
57 }
58 }
59
60 let glob_excludes = glob_builder.build()?;
61
62 let path_excludes: HashSet<_> = self
63 .configuration
64 .excludes
65 .iter()
66 .filter_map(|ex| match ex {
67 Exclusion::Path(p) => Some(p),
68 _ => None,
69 })
70 .collect();
71
72 let host_files = self.load_paths(
73 &self.configuration.paths,
74 FileType::Host,
75 &extensions_set,
76 &glob_excludes,
77 &path_excludes,
78 )?;
79 let vendored_files = self.load_paths(
80 &self.configuration.includes,
81 FileType::Vendored,
82 &extensions_set,
83 &glob_excludes,
84 &path_excludes,
85 )?;
86
87 let mut vendored_file_ids = HashSet::new();
88
89 for file in vendored_files {
93 vendored_file_ids.insert(file.id);
94 db.add(file);
95 }
96
97 for file in host_files {
99 if !vendored_file_ids.contains(&file.id) {
100 db.add(file);
101 }
102 }
103
104 for (name, contents, file_type) in self.memory_sources {
105 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
106
107 db.add(file);
108 }
109
110 Ok(db)
111 }
112
113 fn load_paths(
119 &self,
120 roots: &[Cow<'a, str>],
121 file_type: FileType,
122 extensions: &HashSet<OsString>,
123 glob_excludes: &GlobSet,
124 path_excludes: &HashSet<&Cow<'a, Path>>,
125 ) -> Result<Vec<File>, DatabaseError> {
126 let mut paths_to_process = Vec::new();
127
128 for root in roots {
129 let is_glob_pattern = root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{');
131
132 if is_glob_pattern {
133 let pattern = if Path::new(root.as_ref()).is_absolute() {
135 root.to_string()
136 } else {
137 self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
139 };
140
141 match glob::glob(&pattern) {
142 Ok(entries) => {
143 for entry in entries {
144 match entry {
145 Ok(path) => {
146 if path.is_file() {
147 paths_to_process.push(path);
148 }
149 }
150 Err(e) => {
151 tracing::warn!("Failed to read glob entry: {}", e);
152 }
153 }
154 }
155 }
156 Err(e) => {
157 return Err(DatabaseError::Glob(e.to_string()));
158 }
159 }
160 } else {
161 let dir_path = if Path::new(root.as_ref()).is_absolute() {
163 Path::new(root.as_ref()).to_path_buf()
164 } else {
165 self.configuration.workspace.join(root.as_ref())
166 };
167
168 for entry in WalkDir::new(&dir_path).into_iter().filter_map(Result::ok) {
169 if entry.file_type().is_file() {
170 paths_to_process.push(entry.into_path());
171 }
172 }
173 }
174 }
175
176 let files: Vec<File> = paths_to_process
177 .into_par_iter()
178 .filter_map(|path| {
179 if glob_excludes.is_match(&path) {
180 return None;
181 }
182
183 if let Ok(canonical_path) = path.canonicalize()
184 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
185 {
186 return None;
187 }
188
189 if let Some(ext) = path.extension() {
190 if !extensions.contains(ext) {
191 return None;
192 }
193 } else {
194 return None;
195 }
196
197 match read_file(self.configuration.workspace.as_ref(), &path, file_type) {
198 Ok(file) => Some(Ok(file)),
199 Err(e) => Some(Err(e)),
200 }
201 })
202 .collect::<Result<Vec<File>, _>>()?;
203
204 Ok(files)
205 }
206}