1use std::borrow::Cow;
4use std::collections::HashSet;
5use std::ffi::OsString;
6use std::path::Path;
7
8use globset::Glob;
9use globset::GlobSet;
10use globset::GlobSetBuilder;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14use crate::Database;
15use crate::DatabaseConfiguration;
16use crate::error::DatabaseError;
17use crate::exclusion::Exclusion;
18use crate::file::File;
19use crate::file::FileType;
20use crate::utils::read_file;
21
22pub struct DatabaseLoader<'a> {
24 database: Option<Database<'a>>,
25 configuration: &'a DatabaseConfiguration<'a>,
26 memory_sources: Vec<(&'static str, &'static str, FileType)>,
27}
28
29impl<'a> DatabaseLoader<'a> {
30 pub fn new(configuration: &'a DatabaseConfiguration<'a>) -> Self {
31 Self { configuration, memory_sources: vec![], database: None }
32 }
33
34 pub fn with_database(mut self, database: Database<'a>) -> Self {
35 self.database = Some(database);
36 self
37 }
38
39 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
40 self.memory_sources.push((name, contents, file_type));
41 }
42
43 pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
44 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
45
46 db.configuration = self.configuration.clone();
49
50 let extensions_set: HashSet<OsString> =
51 self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
52
53 let mut glob_builder = GlobSetBuilder::new();
54 for ex in &self.configuration.excludes {
55 if let Exclusion::Pattern(pat) = ex {
56 glob_builder.add(Glob::new(pat)?);
57 }
58 }
59
60 let glob_excludes = glob_builder.build()?;
61
62 let path_excludes: HashSet<_> = self
63 .configuration
64 .excludes
65 .iter()
66 .filter_map(|ex| match ex {
67 Exclusion::Path(p) => Some(p),
68 _ => None,
69 })
70 .collect();
71
72 let host_files = self.load_paths(
73 &self.configuration.paths,
74 FileType::Host,
75 &extensions_set,
76 &glob_excludes,
77 &path_excludes,
78 )?;
79 let vendored_files = self.load_paths(
80 &self.configuration.includes,
81 FileType::Vendored,
82 &extensions_set,
83 &glob_excludes,
84 &path_excludes,
85 )?;
86
87 let mut host_file_ids = HashSet::new();
88
89 for file in host_files {
90 host_file_ids.insert(file.id);
91 db.add(file);
92 }
93
94 for file in vendored_files {
95 if !host_file_ids.contains(&file.id) {
96 db.add(file);
97 }
98 }
99
100 for (name, contents, file_type) in self.memory_sources {
101 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
102
103 db.add(file);
104 }
105
106 Ok(db)
107 }
108
109 fn load_paths(
111 &self,
112 roots: &[Cow<'a, Path>],
113 file_type: FileType,
114 extensions: &HashSet<OsString>,
115 glob_excludes: &GlobSet,
116 path_excludes: &HashSet<&Cow<'a, Path>>,
117 ) -> Result<Vec<File>, DatabaseError> {
118 let mut paths_to_process = Vec::new();
119 for root in roots {
120 for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
121 if entry.file_type().is_file() {
122 paths_to_process.push(entry.into_path());
123 }
124 }
125 }
126
127 let files: Vec<File> = paths_to_process
128 .into_par_iter()
129 .filter_map(|path| {
130 if glob_excludes.is_match(&path) {
131 return None;
132 }
133
134 if let Ok(canonical_path) = path.canonicalize()
135 && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
136 {
137 return None;
138 }
139
140 if let Some(ext) = path.extension() {
141 if !extensions.contains(ext) {
142 return None;
143 }
144 } else {
145 return None;
146 }
147
148 match read_file(self.configuration.workspace.as_ref(), &path, file_type) {
149 Ok(file) => Some(Ok(file)),
150 Err(e) => Some(Err(e)),
151 }
152 })
153 .collect::<Result<Vec<File>, _>>()?;
154
155 Ok(files)
156 }
157}