mago_database/
loader.rs

1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::ffi::OsString;
4use std::path::Path;
5use std::path::PathBuf;
6
7use globset::Glob;
8use globset::GlobSet;
9use globset::GlobSetBuilder;
10use rayon::prelude::*;
11use walkdir::WalkDir;
12
13use crate::Database;
14use crate::error::DatabaseError;
15use crate::exclusion::Exclusion;
16use crate::file::File;
17use crate::file::FileType;
18use crate::utils::read_file;
19
20/// Configures and builds a `Database` by scanning the filesystem and memory.
21pub struct DatabaseLoader {
22    database: Option<Database>,
23    workspace: PathBuf,
24    paths: Vec<PathBuf>,
25    includes: Vec<PathBuf>,
26    excludes: Vec<Exclusion>,
27    memory_sources: Vec<(&'static str, &'static str, FileType)>,
28    extensions: Vec<String>,
29}
30
31impl DatabaseLoader {
32    /// Creates a new loader with the given configuration.
33    ///
34    /// All provided exclusion paths are canonicalized relative to the workspace
35    /// upon creation to ensure they are matched correctly.
36    pub fn new(
37        workspace: PathBuf,
38        paths: Vec<PathBuf>,
39        includes: Vec<PathBuf>,
40        excludes: Vec<Exclusion>,
41        extensions: Vec<String>,
42    ) -> Self {
43        let paths = canonicalize_paths(&workspace, paths);
44        let includes = canonicalize_paths(&workspace, includes);
45
46        let excludes = excludes
47            .into_iter()
48            .filter_map(|exclusion| match exclusion {
49                Exclusion::Path(p) => {
50                    let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
51                    match absolute_path.canonicalize() {
52                        Ok(canonical_p) => Some(Exclusion::Path(canonical_p)),
53                        Err(_) => {
54                            tracing::warn!("Ignoring invalid exclusion path: {}", absolute_path.display());
55                            None
56                        }
57                    }
58                }
59                Exclusion::Pattern(pat) => Some(Exclusion::Pattern(pat)),
60            })
61            .collect();
62
63        Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
64    }
65
66    /// Sets a pre-existing database to populate.
67    pub fn with_database(mut self, database: Database) -> Self {
68        self.database = Some(database);
69        self
70    }
71
72    /// Adds a memory source to the loader.
73    ///
74    /// This allows you to include files that are not on the filesystem but should be part of the database.
75    ///
76    /// # Arguments
77    ///
78    /// * `name` - The logical name of the file, typically its path relative to the workspace.
79    /// * `contents` - The contents of the file as a string.
80    /// * `file_type` - The type of the file, indicating whether it's a host file or a vendored file.
81    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
82        self.memory_sources.push((name, contents, file_type));
83    }
84
85    /// Scans sources according to the configuration and builds a `Database`.
86    pub fn load(mut self) -> Result<Database, DatabaseError> {
87        let mut db = self.database.take().unwrap_or_default();
88        let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
89
90        let mut glob_builder = GlobSetBuilder::new();
91        for ex in &self.excludes {
92            if let Exclusion::Pattern(pat) = ex {
93                glob_builder.add(Glob::new(pat)?);
94            }
95        }
96
97        let glob_excludes = glob_builder.build()?;
98
99        let path_excludes: HashSet<_> = self
100            .excludes
101            .iter()
102            .filter_map(|ex| match ex {
103                Exclusion::Path(p) => Some(p),
104                _ => None,
105            })
106            .collect();
107
108        let host_files =
109            self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes, &path_excludes)?;
110        let vendored_files =
111            self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes, &path_excludes)?;
112
113        for file in host_files.into_iter().chain(vendored_files.into_iter()) {
114            db.add(file);
115        }
116
117        for (name, contents, file_type) in self.memory_sources {
118            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
119
120            db.add(file);
121        }
122
123        Ok(db)
124    }
125
126    /// Discovers and reads all files from a set of root paths in parallel.
127    fn load_paths(
128        &self,
129        roots: &[PathBuf],
130        file_type: FileType,
131        extensions: &HashSet<OsString>,
132        glob_excludes: &GlobSet,
133        path_excludes: &HashSet<&PathBuf>,
134    ) -> Result<Vec<File>, DatabaseError> {
135        let mut paths_to_process = Vec::new();
136        for root in roots {
137            for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
138                if entry.file_type().is_file() {
139                    paths_to_process.push(entry.into_path());
140                }
141            }
142        }
143
144        let files: Vec<File> = paths_to_process
145            .into_par_iter()
146            .filter_map(|path| {
147                if glob_excludes.is_match(&path) {
148                    return None;
149                }
150
151                if let Ok(canonical_path) = path.canonicalize()
152                    && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
153                {
154                    return None;
155                }
156
157                if let Some(ext) = path.extension() {
158                    if !extensions.contains(ext) {
159                        return None;
160                    }
161                } else {
162                    return None;
163                }
164
165                match read_file(&self.workspace, &path, file_type) {
166                    Ok(file) => Some(Ok(file)),
167                    Err(e) => Some(Err(e)),
168                }
169            })
170            .collect::<Result<Vec<File>, _>>()?;
171
172        Ok(files)
173    }
174}
175
176/// A helper function to canonicalize a vector of paths relative to a workspace.
177///
178/// It handles both absolute and relative paths and logs a warning for any
179/// path that cannot be resolved, filtering it out from the final result.
180fn canonicalize_paths(workspace: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
181    paths
182        .into_iter()
183        .filter_map(|p| {
184            let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
185
186            match absolute_path.canonicalize() {
187                Ok(canonical_p) => Some(canonical_p),
188                Err(_) => {
189                    tracing::warn!("Ignoring invalid or non-existent path: {}", absolute_path.display());
190                    None
191                }
192            }
193        })
194        .collect()
195}