1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::ffi::OsString;
4use std::path::Path;
5use std::path::PathBuf;
6
7use globset::Glob;
8use globset::GlobSet;
9use globset::GlobSetBuilder;
10use rayon::prelude::*;
11use walkdir::WalkDir;
12
13use crate::Database;
14use crate::error::DatabaseError;
15use crate::exclusion::Exclusion;
16use crate::file::File;
17use crate::file::FileType;
18use crate::utils::read_file;
19
20pub struct DatabaseLoader {
22 database: Option<Database>,
23 workspace: PathBuf,
24 paths: Vec<PathBuf>,
25 includes: Vec<PathBuf>,
26 excludes: Vec<Exclusion>,
27 memory_sources: Vec<(&'static str, &'static str, FileType)>,
28 extensions: Vec<String>,
29}
30
31impl DatabaseLoader {
32 #[allow(clippy::too_many_arguments)]
37 pub fn new(
38 workspace: PathBuf,
39 paths: Vec<PathBuf>,
40 includes: Vec<PathBuf>,
41 excludes: Vec<Exclusion>,
42 extensions: Vec<String>,
43 ) -> Self {
44 let paths = canonicalize_paths(&workspace, paths);
45 let includes = canonicalize_paths(&workspace, includes);
46
47 let excludes = excludes
48 .into_iter()
49 .filter_map(|exclusion| match exclusion {
50 Exclusion::Path(p) => {
51 let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
52 match absolute_path.canonicalize() {
53 Ok(canonical_p) => Some(Exclusion::Path(canonical_p)),
54 Err(_) => {
55 tracing::warn!("Ignoring invalid exclusion path: {}", absolute_path.display());
56 None
57 }
58 }
59 }
60 Exclusion::Pattern(pat) => Some(Exclusion::Pattern(pat)),
61 })
62 .collect();
63
64 Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
65 }
66
67 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
77 self.memory_sources.push((name, contents, file_type));
78 }
79
80 pub fn load(mut self) -> Result<Database, DatabaseError> {
82 let mut db = self.database.take().unwrap_or_default();
83 let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
84
85 let mut glob_builder = GlobSetBuilder::new();
86 for ex in &self.excludes {
87 if let Exclusion::Pattern(pat) = ex {
88 glob_builder.add(Glob::new(pat)?);
89 }
90 }
91
92 let glob_excludes = glob_builder.build()?;
93
94 let path_excludes: HashSet<_> = self
95 .excludes
96 .iter()
97 .filter_map(|ex| match ex {
98 Exclusion::Path(p) => Some(p),
99 _ => None,
100 })
101 .collect();
102
103 let host_files =
104 self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes, &path_excludes)?;
105 let vendored_files =
106 self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes, &path_excludes)?;
107
108 for file in host_files.into_iter().chain(vendored_files.into_iter()) {
109 db.add(file);
110 }
111
112 for (name, contents, file_type) in self.memory_sources {
113 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
114
115 db.add(file);
116 }
117
118 Ok(db)
119 }
120
121 fn load_paths(
123 &self,
124 roots: &[PathBuf],
125 file_type: FileType,
126 extensions: &HashSet<OsString>,
127 glob_excludes: &GlobSet,
128 path_excludes: &HashSet<&PathBuf>,
129 ) -> Result<Vec<File>, DatabaseError> {
130 let mut paths_to_process = Vec::new();
131 for root in roots {
132 for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
133 if entry.file_type().is_file() {
134 paths_to_process.push(entry.into_path());
135 }
136 }
137 }
138
139 let files: Vec<File> = paths_to_process
140 .into_par_iter()
141 .filter_map(|path| {
142 if glob_excludes.is_match(&path) {
143 return None;
144 }
145
146 if let Ok(canonical_path) = path.canonicalize()
147 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
148 {
149 return None;
150 }
151
152 if let Some(ext) = path.extension() {
153 if !extensions.contains(ext) {
154 return None;
155 }
156 } else {
157 return None;
158 }
159
160 match read_file(&self.workspace, &path, file_type) {
161 Ok(file) => Some(Ok(file)),
162 Err(e) => Some(Err(e)),
163 }
164 })
165 .collect::<Result<Vec<File>, _>>()?;
166
167 Ok(files)
168 }
169}
170
171fn canonicalize_paths(workspace: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
176 paths
177 .into_iter()
178 .filter_map(|p| {
179 let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
180
181 match absolute_path.canonicalize() {
182 Ok(canonical_p) => Some(canonical_p),
183 Err(_) => {
184 tracing::warn!("Ignoring invalid or non-existent path: {}", absolute_path.display());
185 None
186 }
187 }
188 })
189 .collect()
190}