1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::ffi::OsString;
4use std::path::Path;
5use std::path::PathBuf;
6
7use globset::Glob;
8use globset::GlobSet;
9use globset::GlobSetBuilder;
10use rayon::prelude::*;
11use walkdir::WalkDir;
12
13use crate::Database;
14use crate::error::DatabaseError;
15use crate::exclusion::Exclusion;
16use crate::file::File;
17use crate::file::FileType;
18use crate::utils::read_file;
19
20pub struct DatabaseLoader {
22 database: Option<Database>,
23 workspace: PathBuf,
24 paths: Vec<PathBuf>,
25 includes: Vec<PathBuf>,
26 excludes: Vec<Exclusion>,
27 memory_sources: Vec<(&'static str, &'static str, FileType)>,
28 extensions: Vec<String>,
29}
30
31impl DatabaseLoader {
32 pub fn new(
37 workspace: PathBuf,
38 paths: Vec<PathBuf>,
39 includes: Vec<PathBuf>,
40 excludes: Vec<Exclusion>,
41 extensions: Vec<String>,
42 ) -> Self {
43 let paths = canonicalize_paths(&workspace, paths);
44 let includes = canonicalize_paths(&workspace, includes);
45
46 let excludes = excludes
47 .into_iter()
48 .filter_map(|exclusion| match exclusion {
49 Exclusion::Path(p) => {
50 let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
51 match absolute_path.canonicalize() {
52 Ok(canonical_p) => Some(Exclusion::Path(canonical_p)),
53 Err(_) => {
54 tracing::warn!("Ignoring invalid exclusion path: {}", absolute_path.display());
55 None
56 }
57 }
58 }
59 Exclusion::Pattern(pat) => Some(Exclusion::Pattern(pat)),
60 })
61 .collect();
62
63 Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
64 }
65
66 pub fn with_database(mut self, database: Database) -> Self {
68 self.database = Some(database);
69 self
70 }
71
72 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
82 self.memory_sources.push((name, contents, file_type));
83 }
84
85 pub fn load(mut self) -> Result<Database, DatabaseError> {
87 let mut db = self.database.take().unwrap_or_default();
88 let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
89
90 let mut glob_builder = GlobSetBuilder::new();
91 for ex in &self.excludes {
92 if let Exclusion::Pattern(pat) = ex {
93 glob_builder.add(Glob::new(pat)?);
94 }
95 }
96
97 let glob_excludes = glob_builder.build()?;
98
99 let path_excludes: HashSet<_> = self
100 .excludes
101 .iter()
102 .filter_map(|ex| match ex {
103 Exclusion::Path(p) => Some(p),
104 _ => None,
105 })
106 .collect();
107
108 let host_files =
109 self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes, &path_excludes)?;
110 let vendored_files =
111 self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes, &path_excludes)?;
112
113 for file in host_files.into_iter().chain(vendored_files.into_iter()) {
114 db.add(file);
115 }
116
117 for (name, contents, file_type) in self.memory_sources {
118 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
119
120 db.add(file);
121 }
122
123 Ok(db)
124 }
125
126 fn load_paths(
128 &self,
129 roots: &[PathBuf],
130 file_type: FileType,
131 extensions: &HashSet<OsString>,
132 glob_excludes: &GlobSet,
133 path_excludes: &HashSet<&PathBuf>,
134 ) -> Result<Vec<File>, DatabaseError> {
135 let mut paths_to_process = Vec::new();
136 for root in roots {
137 for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
138 if entry.file_type().is_file() {
139 paths_to_process.push(entry.into_path());
140 }
141 }
142 }
143
144 let files: Vec<File> = paths_to_process
145 .into_par_iter()
146 .filter_map(|path| {
147 if glob_excludes.is_match(&path) {
148 return None;
149 }
150
151 if let Ok(canonical_path) = path.canonicalize()
152 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
153 {
154 return None;
155 }
156
157 if let Some(ext) = path.extension() {
158 if !extensions.contains(ext) {
159 return None;
160 }
161 } else {
162 return None;
163 }
164
165 match read_file(&self.workspace, &path, file_type) {
166 Ok(file) => Some(Ok(file)),
167 Err(e) => Some(Err(e)),
168 }
169 })
170 .collect::<Result<Vec<File>, _>>()?;
171
172 Ok(files)
173 }
174}
175
176fn canonicalize_paths(workspace: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
181 paths
182 .into_iter()
183 .filter_map(|p| {
184 let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
185
186 match absolute_path.canonicalize() {
187 Ok(canonical_p) => Some(canonical_p),
188 Err(_) => {
189 tracing::warn!("Ignoring invalid or non-existent path: {}", absolute_path.display());
190 None
191 }
192 }
193 })
194 .collect()
195}