mago_database/
loader.rs

1use std::borrow::Cow;
2use std::collections::HashSet;
3use std::ffi::OsString;
4use std::path::Path;
5use std::path::PathBuf;
6
7use globset::Glob;
8use globset::GlobSet;
9use globset::GlobSetBuilder;
10use rayon::prelude::*;
11use walkdir::WalkDir;
12
13use crate::Database;
14use crate::error::DatabaseError;
15use crate::exclusion::Exclusion;
16use crate::file::File;
17use crate::file::FileType;
18use crate::utils::read_file;
19
20/// Configures and builds a `Database` by scanning the filesystem and memory.
21pub struct DatabaseLoader {
22    database: Option<Database>,
23    workspace: PathBuf,
24    paths: Vec<PathBuf>,
25    includes: Vec<PathBuf>,
26    excludes: Vec<Exclusion>,
27    memory_sources: Vec<(&'static str, &'static str, FileType)>,
28    extensions: Vec<String>,
29}
30
31impl DatabaseLoader {
32    /// Creates a new loader with the given configuration.
33    ///
34    /// All provided exclusion paths are canonicalized relative to the workspace
35    /// upon creation to ensure they are matched correctly.
36    #[allow(clippy::too_many_arguments)]
37    pub fn new(
38        workspace: PathBuf,
39        paths: Vec<PathBuf>,
40        includes: Vec<PathBuf>,
41        excludes: Vec<Exclusion>,
42        extensions: Vec<String>,
43    ) -> Self {
44        let paths = canonicalize_paths(&workspace, paths);
45        let includes = canonicalize_paths(&workspace, includes);
46
47        let excludes = excludes
48            .into_iter()
49            .filter_map(|exclusion| match exclusion {
50                Exclusion::Path(p) => {
51                    let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
52                    match absolute_path.canonicalize() {
53                        Ok(canonical_p) => Some(Exclusion::Path(canonical_p)),
54                        Err(_) => {
55                            tracing::warn!("Ignoring invalid exclusion path: {}", absolute_path.display());
56                            None
57                        }
58                    }
59                }
60                Exclusion::Pattern(pat) => Some(Exclusion::Pattern(pat)),
61            })
62            .collect();
63
64        Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions, database: None }
65    }
66
67    /// Adds a memory source to the loader.
68    ///
69    /// This allows you to include files that are not on the filesystem but should be part of the database.
70    ///
71    /// # Arguments
72    ///
73    /// * `name` - The logical name of the file, typically its path relative to the workspace.
74    /// * `contents` - The contents of the file as a string.
75    /// * `file_type` - The type of the file, indicating whether it's a host file or a vendored file.
76    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
77        self.memory_sources.push((name, contents, file_type));
78    }
79
80    /// Scans sources according to the configuration and builds a `Database`.
81    pub fn load(mut self) -> Result<Database, DatabaseError> {
82        let mut db = self.database.take().unwrap_or_default();
83        let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
84
85        let mut glob_builder = GlobSetBuilder::new();
86        for ex in &self.excludes {
87            if let Exclusion::Pattern(pat) = ex {
88                glob_builder.add(Glob::new(pat)?);
89            }
90        }
91
92        let glob_excludes = glob_builder.build()?;
93
94        let path_excludes: HashSet<_> = self
95            .excludes
96            .iter()
97            .filter_map(|ex| match ex {
98                Exclusion::Path(p) => Some(p),
99                _ => None,
100            })
101            .collect();
102
103        let host_files =
104            self.load_paths(&self.paths, FileType::Host, &extensions_set, &glob_excludes, &path_excludes)?;
105        let vendored_files =
106            self.load_paths(&self.includes, FileType::Vendored, &extensions_set, &glob_excludes, &path_excludes)?;
107
108        for file in host_files.into_iter().chain(vendored_files.into_iter()) {
109            db.add(file);
110        }
111
112        for (name, contents, file_type) in self.memory_sources {
113            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
114
115            db.add(file);
116        }
117
118        Ok(db)
119    }
120
121    /// Discovers and reads all files from a set of root paths in parallel.
122    fn load_paths(
123        &self,
124        roots: &[PathBuf],
125        file_type: FileType,
126        extensions: &HashSet<OsString>,
127        glob_excludes: &GlobSet,
128        path_excludes: &HashSet<&PathBuf>,
129    ) -> Result<Vec<File>, DatabaseError> {
130        let mut paths_to_process = Vec::new();
131        for root in roots {
132            for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
133                if entry.file_type().is_file() {
134                    paths_to_process.push(entry.into_path());
135                }
136            }
137        }
138
139        let files: Vec<File> = paths_to_process
140            .into_par_iter()
141            .filter_map(|path| {
142                if glob_excludes.is_match(&path) {
143                    return None;
144                }
145
146                if let Ok(canonical_path) = path.canonicalize()
147                    && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
148                {
149                    return None;
150                }
151
152                if let Some(ext) = path.extension() {
153                    if !extensions.contains(ext) {
154                        return None;
155                    }
156                } else {
157                    return None;
158                }
159
160                match read_file(&self.workspace, &path, file_type) {
161                    Ok(file) => Some(Ok(file)),
162                    Err(e) => Some(Err(e)),
163                }
164            })
165            .collect::<Result<Vec<File>, _>>()?;
166
167        Ok(files)
168    }
169}
170
171/// A helper function to canonicalize a vector of paths relative to a workspace.
172///
173/// It handles both absolute and relative paths and logs a warning for any
174/// path that cannot be resolved, filtering it out from the final result.
175fn canonicalize_paths(workspace: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
176    paths
177        .into_iter()
178        .filter_map(|p| {
179            let absolute_path = if p.is_absolute() { p } else { workspace.join(p) };
180
181            match absolute_path.canonicalize() {
182                Ok(canonical_p) => Some(canonical_p),
183                Err(_) => {
184                    tracing::warn!("Ignoring invalid or non-existent path: {}", absolute_path.display());
185                    None
186                }
187            }
188        })
189        .collect()
190}