mago_database/
loader.rs

1use std::collections::HashSet;
2use std::ffi::OsString;
3use std::path::Path;
4use std::path::PathBuf;
5
6use globset::Glob;
7use globset::GlobSet;
8use globset::GlobSetBuilder;
9use walkdir::WalkDir;
10
11use crate::Database;
12use crate::error::DatabaseError;
13use crate::exclusion::Exclusion;
14use crate::file::File;
15use crate::file::FileType;
16use crate::utils::read_file;
17
18/// Configures and builds a `Database` by scanning the filesystem and memory.
19pub struct DatabaseLoader<'a> {
20    workspace: PathBuf,
21    paths: Vec<PathBuf>,
22    includes: Vec<PathBuf>,
23    excludes: Vec<Exclusion>,
24    memory_sources: Vec<(&'a str, &'a str, FileType)>,
25    extensions: Vec<String>,
26}
27
28impl<'a> DatabaseLoader<'a> {
29    /// Creates a new loader with the given configuration.
30    #[allow(clippy::too_many_arguments)]
31    pub fn new(
32        workspace: PathBuf,
33        paths: Vec<PathBuf>,
34        includes: Vec<PathBuf>,
35        excludes: Vec<Exclusion>,
36        extensions: Vec<String>,
37    ) -> Self {
38        Self { workspace, paths, includes, excludes, memory_sources: vec![], extensions }
39    }
40
41    /// Adds a memory source to the loader.
42    ///
43    /// This allows you to include files that are not on the filesystem but should be part of the database.
44    ///
45    /// # Arguments
46    ///
47    /// * `name` - The logical name of the file, typically its path relative to the workspace.
48    /// * `contents` - The contents of the file as a string.
49    /// * `file_type` - The type of the file, indicating whether it's a host file or a vendored file.
50    pub fn add_memory_source(&mut self, name: &'a str, contents: &'a str, file_type: FileType) {
51        self.memory_sources.push((name, contents, file_type));
52    }
53
54    /// Scans sources according to the configuration and builds a `Database`.
55    ///
56    /// This is the main entry point that orchestrates the entire loading process.
57    /// It returns a `Result` as some pre-processing, like compiling globs, can fail.
58    pub fn load(&self) -> Result<Database, DatabaseError> {
59        let mut db = Database::new();
60
61        let extensions_set: HashSet<OsString> = self.extensions.iter().map(OsString::from).collect();
62
63        let mut glob_builder = GlobSetBuilder::new();
64        for ex in &self.excludes {
65            if let Exclusion::Pattern(pat) = ex {
66                glob_builder.add(Glob::new(pat)?);
67            }
68        }
69        let glob_excludes = glob_builder.build()?;
70
71        self.load_paths(&mut db, &self.paths, FileType::Host, &extensions_set, &glob_excludes)?;
72        self.load_paths(&mut db, &self.includes, FileType::Vendored, &extensions_set, &glob_excludes)?;
73
74        for (name, contents, file_type) in &self.memory_sources {
75            let file = File::new(name.to_string(), *file_type, None, contents.to_string());
76
77            db.add(file);
78        }
79
80        Ok(db)
81    }
82
83    fn load_paths(
84        &self,
85        db: &mut Database,
86        roots: &[PathBuf],
87        file_type: FileType,
88        extensions: &HashSet<OsString>,
89        glob_excludes: &GlobSet,
90    ) -> Result<(), DatabaseError> {
91        let path_excludes: HashSet<_> = self
92            .excludes
93            .iter()
94            .filter_map(|ex| match ex {
95                Exclusion::Path(p) => p.canonicalize().ok(),
96                _ => None,
97            })
98            .collect();
99
100        for root in roots {
101            for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
102                if entry.file_type().is_file() {
103                    self.process_path(db, entry.path(), file_type, extensions, glob_excludes, &path_excludes)?;
104                }
105            }
106        }
107
108        Ok(())
109    }
110
111    /// The "File Processor" part: applies all filters to a single path.
112    fn process_path(
113        &self,
114        db: &mut Database,
115        path: &Path,
116        file_type: FileType,
117        extensions: &HashSet<OsString>,
118        glob_excludes: &GlobSet,
119        path_excludes: &HashSet<PathBuf>,
120    ) -> Result<(), DatabaseError> {
121        // Filter 1: Check against pre-compiled glob patterns.
122        if glob_excludes.is_match(path) {
123            return Ok(());
124        }
125
126        // Filter 2: Check against specific paths.
127        if let Ok(canonical_path) = path.canonicalize()
128            && path_excludes.contains(&canonical_path)
129        {
130            return Ok(());
131        }
132
133        // Filter 3: Check file extension.
134        let extension = path.extension();
135
136        if let Some(ext) = extension
137            && !extensions.contains(ext)
138        {
139            return Ok(());
140        } else if extension.is_none() {
141            return Ok(()); // No extension, so we skip it.
142        }
143
144        let file = read_file(&self.workspace, path, file_type)?;
145
146        db.add(file);
147
148        Ok(())
149    }
150}