mago_database/
loader.rs

1//! Database loader for scanning and loading project files.
2
3use std::borrow::Cow;
4use std::collections::HashSet;
5use std::ffi::OsString;
6use std::path::Path;
7
8use globset::Glob;
9use globset::GlobSet;
10use globset::GlobSetBuilder;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14use crate::Database;
15use crate::DatabaseConfiguration;
16use crate::error::DatabaseError;
17use crate::exclusion::Exclusion;
18use crate::file::File;
19use crate::file::FileType;
20use crate::utils::read_file;
21
22/// Builder for loading files into a Database from the filesystem and memory.
23pub struct DatabaseLoader<'a> {
24    database: Option<Database<'a>>,
25    configuration: &'a DatabaseConfiguration<'a>,
26    memory_sources: Vec<(&'static str, &'static str, FileType)>,
27}
28
29impl<'a> DatabaseLoader<'a> {
30    pub fn new(configuration: &'a DatabaseConfiguration<'a>) -> Self {
31        Self { configuration, memory_sources: vec![], database: None }
32    }
33
34    pub fn with_database(mut self, database: Database<'a>) -> Self {
35        self.database = Some(database);
36        self
37    }
38
39    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
40        self.memory_sources.push((name, contents, file_type));
41    }
42
43    pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
44        let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
45
46        // Update database configuration to use the loader's configuration
47        // (fixes workspace path when merging with prelude database)
48        db.configuration = self.configuration.clone();
49
50        let extensions_set: HashSet<OsString> =
51            self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
52
53        let mut glob_builder = GlobSetBuilder::new();
54        for ex in &self.configuration.excludes {
55            if let Exclusion::Pattern(pat) = ex {
56                glob_builder.add(Glob::new(pat)?);
57            }
58        }
59
60        let glob_excludes = glob_builder.build()?;
61
62        let path_excludes: HashSet<_> = self
63            .configuration
64            .excludes
65            .iter()
66            .filter_map(|ex| match ex {
67                Exclusion::Path(p) => Some(p),
68                _ => None,
69            })
70            .collect();
71
72        let host_files = self.load_paths(
73            &self.configuration.paths,
74            FileType::Host,
75            &extensions_set,
76            &glob_excludes,
77            &path_excludes,
78        )?;
79        let vendored_files = self.load_paths(
80            &self.configuration.includes,
81            FileType::Vendored,
82            &extensions_set,
83            &glob_excludes,
84            &path_excludes,
85        )?;
86
87        let mut host_file_ids = HashSet::new();
88
89        for file in host_files {
90            host_file_ids.insert(file.id);
91            db.add(file);
92        }
93
94        for file in vendored_files {
95            if !host_file_ids.contains(&file.id) {
96                db.add(file);
97            }
98        }
99
100        for (name, contents, file_type) in self.memory_sources {
101            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
102
103            db.add(file);
104        }
105
106        Ok(db)
107    }
108
109    /// Discovers and reads all files from a set of root paths in parallel.
110    fn load_paths(
111        &self,
112        roots: &[Cow<'a, Path>],
113        file_type: FileType,
114        extensions: &HashSet<OsString>,
115        glob_excludes: &GlobSet,
116        path_excludes: &HashSet<&Cow<'a, Path>>,
117    ) -> Result<Vec<File>, DatabaseError> {
118        let mut paths_to_process = Vec::new();
119        for root in roots {
120            for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
121                if entry.file_type().is_file() {
122                    paths_to_process.push(entry.into_path());
123                }
124            }
125        }
126
127        let files: Vec<File> = paths_to_process
128            .into_par_iter()
129            .filter_map(|path| {
130                if glob_excludes.is_match(&path) {
131                    return None;
132                }
133
134                if let Ok(canonical_path) = path.canonicalize()
135                    && path_excludes.iter().any(|excluded| canonical_path.starts_with(excluded))
136                {
137                    return None;
138                }
139
140                if let Some(ext) = path.extension() {
141                    if !extensions.contains(ext) {
142                        return None;
143                    }
144                } else {
145                    return None;
146                }
147
148                match read_file(self.configuration.workspace.as_ref(), &path, file_type) {
149                    Ok(file) => Some(Ok(file)),
150                    Err(e) => Some(Err(e)),
151                }
152            })
153            .collect::<Result<Vec<File>, _>>()?;
154
155        Ok(files)
156    }
157}